Skip to content

Commit a123bea

Browse files
aqjunetstellar
authored andcommitted
[LoopVectorize] Fix VPRecipeBuilder::createEdgeMask to correctly generate the mask
This patch fixes pr48832 by correctly generating the mask when a poison value is involved. Consider this CFG (which is a part of the input): ``` for.body: ; preds = %for.cond br i1 true, label %cond.false, label %land.rhs land.rhs: ; preds = %for.body br i1 poison, label %cond.end, label %cond.false cond.false: ; preds = %for.body, %land.rhs br label %cond.end cond.end: ; preds = %land.rhs, %cond.false %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ] ``` The path for.body -> land.rhs -> cond.end should be taken when 'select i1 false, i1 poison, i1 false' holds (which means it's never taken); but VPRecipeBuilder::createEdgeMask was emitting 'and i1 false, poison' instead. The former one successfully blocks poison propagation whereas the latter one doesn't, making the condition poison and thus causing the miscompilation. SimplifyCFG has a similar bug (which didn't expose a real-world bug yet), and a patch for this is also ongoing (see https://reviews.llvm.org/D95026). Reviewed By: bjope Differential Revision: https://reviews.llvm.org/D95217 (cherry picked from commit ed253ef)
1 parent f8b3298 commit a123bea

File tree

10 files changed

+93
-42
lines changed

10 files changed

+93
-42
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ class VPBuilder {
142142
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
143143
}
144144

145+
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
146+
return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
147+
}
148+
145149
//===--------------------------------------------------------------------===//
146150
// RAII helpers.
147151
//===--------------------------------------------------------------------===//

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8195,8 +8195,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
81958195
if (BI->getSuccessor(0) != Dst)
81968196
EdgeMask = Builder.createNot(EdgeMask);
81978197

8198-
if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
8199-
EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
8198+
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
8199+
// The condition is 'SrcMask && EdgeMask', which is equivalent to
8200+
// 'select i1 SrcMask, i1 EdgeMask, i1 false'.
8201+
// The select version does not introduce new UB if SrcMask is false and
8202+
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
8203+
VPValue *False = Plan->getOrAddVPValue(
8204+
ConstantInt::getFalse(BI->getCondition()->getType()));
8205+
EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
8206+
}
82008207

82018208
return EdgeMaskCache[Edge] = EdgeMask;
82028209
}

llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,10 +2042,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
20422042
; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
20432043
; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
20442044
; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2045-
; AVX1-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2046-
; AVX1-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2047-
; AVX1-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2048-
; AVX1-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2045+
; AVX1-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
2046+
; AVX1-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
2047+
; AVX1-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
2048+
; AVX1-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
20492049
; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
20502050
; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
20512051
; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2166,10 +2166,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
21662166
; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
21672167
; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
21682168
; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2169-
; AVX2-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2170-
; AVX2-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2171-
; AVX2-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2172-
; AVX2-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2169+
; AVX2-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
2170+
; AVX2-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
2171+
; AVX2-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
2172+
; AVX2-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
21732173
; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
21742174
; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
21752175
; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2290,10 +2290,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
22902290
; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
22912291
; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
22922292
; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2293-
; AVX512-NEXT: [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]]
2294-
; AVX512-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]]
2295-
; AVX512-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]]
2296-
; AVX512-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]]
2293+
; AVX512-NEXT: [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer
2294+
; AVX512-NEXT: [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer
2295+
; AVX512-NEXT: [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer
2296+
; AVX512-NEXT: [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer
22972297
; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
22982298
; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>*
22992299
; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]])
@@ -2459,10 +2459,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
24592459
; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
24602460
; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
24612461
; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2462-
; AVX1-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2463-
; AVX1-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2464-
; AVX1-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2465-
; AVX1-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2462+
; AVX1-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
2463+
; AVX1-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
2464+
; AVX1-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
2465+
; AVX1-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
24662466
; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
24672467
; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
24682468
; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2583,10 +2583,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
25832583
; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
25842584
; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
25852585
; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
2586-
; AVX2-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
2587-
; AVX2-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
2588-
; AVX2-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
2589-
; AVX2-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
2586+
; AVX2-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
2587+
; AVX2-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
2588+
; AVX2-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
2589+
; AVX2-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
25902590
; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
25912591
; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
25922592
; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2707,10 +2707,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
27072707
; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
27082708
; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
27092709
; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
2710-
; AVX512-NEXT: [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]]
2711-
; AVX512-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]]
2712-
; AVX512-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]]
2713-
; AVX512-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]]
2710+
; AVX512-NEXT: [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer
2711+
; AVX512-NEXT: [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer
2712+
; AVX512-NEXT: [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer
2713+
; AVX512-NEXT: [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer
27142714
; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
27152715
; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>*
27162716
; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]])

llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
408408
; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
409409
; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
410410
; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
411-
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
411+
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
412412
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
413413
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
414414
; DISABLED_MASKED_STRIDED: pred.load.if:
@@ -520,7 +520,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
520520
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
521521
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1
522522
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
523-
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
523+
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
524524
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
525525
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
526526
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
@@ -615,7 +615,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
615615
; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
616616
; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
617617
; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
618-
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
618+
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
619619
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
620620
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
621621
; DISABLED_MASKED_STRIDED: pred.load.if:
@@ -727,7 +727,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
727727
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
728728
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw i32 [[INDEX]], 3
729729
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
730-
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
730+
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
731731
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
732732
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
733733
; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
@@ -1535,7 +1535,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
15351535
; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
15361536
; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
15371537
; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1538-
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
1538+
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
15391539
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
15401540
; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
15411541
; DISABLED_MASKED_STRIDED: pred.load.if:
@@ -1871,7 +1871,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
18711871
; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
18721872
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1
18731873
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
1874-
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
1874+
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
18751875
; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
18761876
; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
18771877
; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)

llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
3939
; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
4040
; CHECK-NEXT: [[TMP12:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4>
4141
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
42-
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP11]], [[TMP10]]
42+
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP10]], [[TMP11]]
4343
; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
4444
; CHECK-NEXT: [[TMP16:%.*]] = and <4 x i1> [[TMP10]], [[TMP15]]
4545
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>

llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ for.cond.cleanup: ; preds = %if.end
161161
; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], <i32 100, i32 100>
162162
; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], <i32 200, i32 200>
163163
; CHECK: %[[NOT:.+]] = xor <2 x i1> %[[CMP1]], <i1 true, i1 true>
164-
; CHECK: %[[AND:.+]] = and <2 x i1> %[[CMP2]], %[[NOT]]
164+
; CHECK: %[[AND:.+]] = select <2 x i1> %[[NOT]], <2 x i1> %[[CMP2]], <2 x i1> zeroinitializer
165165
; CHECK: %[[OR:.+]] = or <2 x i1> %[[AND]], %[[CMP1]]
166166
; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR]], i32 0
167167
; CHECK: br i1 %[[EXTRACT]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]

llvm/test/Transforms/LoopVectorize/if-reduction.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -610,9 +610,9 @@ for.end: ; preds = %for.body, %entry
610610
; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], <float 3.000000e+00,
611611
; CHECK-DAG: %[[M2:.*]] = fmul fast <4 x float> %[[V0]], <float 2.000000e+00,
612612
; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
613-
; CHECK-DAG: %[[C12:.*]] = and <4 x i1> %[[C2]], %[[C11]]
613+
; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
614614
; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
615-
; CHECK: %[[C22:.*]] = and <4 x i1> %[[C21]], %[[C11]]
615+
; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
616616
; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
617617
; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
618618
; CHECK: fadd fast <4 x float> %[[S2]],
@@ -678,9 +678,9 @@ for.end: ; preds = %for.inc, %entry
678678
; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float>
679679
; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float>
680680
; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
681-
; CHECK-DAG: %[[C12:.*]] = and <4 x i1> %[[C2]], %[[C11]]
681+
; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
682682
; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
683-
; CHECK: %[[C22:.*]] = and <4 x i1> %[[C21]], %[[C11]]
683+
; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
684684
; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
685685
; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
686686
define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly {

0 commit comments

Comments
 (0)