Skip to content

Commit 632317e

Browse files
authored
[VPlan] Add non-poison propagating LogicalAnd VPInstruction opcode. (#91897)
Add a new opcode to mode non-poison propagating logical AND operations used when generating edge masks. This follows the similar decision to model Not as dedicated opcode as well, to improve clarity. This also helps to simplify the matchers for #89386. PR: #91897
1 parent 346f2b7 commit 632317e

File tree

6 files changed

+35
-20
lines changed

6 files changed

+35
-20
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,12 @@ class VPBuilder {
179179
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
180180
}
181181

182+
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
183+
const Twine &Name = "") {
184+
return tryInsertInstruction(
185+
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
186+
}
187+
182188
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
183189
DebugLoc DL = {}, const Twine &Name = "",
184190
std::optional<FastMathFlags> FMFs = std::nullopt) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8011,14 +8011,13 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
80118011
EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
80128012

80138013
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
8014-
// The condition is 'SrcMask && EdgeMask', which is equivalent to
8015-
// 'select i1 SrcMask, i1 EdgeMask, i1 false'.
8016-
// The select version does not introduce new UB if SrcMask is false and
8017-
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
8018-
VPValue *False = Plan.getOrAddLiveIn(
8019-
ConstantInt::getFalse(BI->getCondition()->getType()));
8020-
EdgeMask =
8021-
Builder.createSelect(SrcMask, EdgeMask, False, BI->getDebugLoc());
8014+
// Use LogicalAnd as it does not propagate poison, i.e. does not introduce
8015+
// new UB if SrcMask is false and EdgeMask is poison. Using 'and' here
8016+
// introduces undefined behavior.
8017+
// The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
8018+
// is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
8019+
// instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
8020+
EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
80228021
}
80238022

80248023
return EdgeMaskCache[Edge] = EdgeMask;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,6 +1177,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
11771177
BranchOnCount,
11781178
BranchOnCond,
11791179
ComputeReductionResult,
1180+
LogicalAnd, // Non-poison propagating logical And.
11801181
// Add an offset in bytes (second operand) to a base pointer (first
11811182
// operand). Only generates scalar values (either for the first lane only or
11821183
// for all lanes, depending on its uses).

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
137137
case VPInstruction::Not:
138138
case VPInstruction::CalculateTripCountMinusVF:
139139
case VPInstruction::CanonicalIVIncrementForPart:
140+
case VPInstruction::LogicalAnd:
140141
case VPInstruction::PtrAdd:
141142
return false;
142143
default:
@@ -557,6 +558,11 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
557558

558559
return ReducedPartRdx;
559560
}
561+
case VPInstruction::LogicalAnd: {
562+
Value *A = State.get(getOperand(0), Part);
563+
Value *B = State.get(getOperand(1), Part);
564+
return Builder.CreateLogicalAnd(A, B, Name);
565+
}
560566
case VPInstruction::PtrAdd: {
561567
assert(vputils::onlyFirstLaneUsed(this) &&
562568
"can only generate first lane for PtrAdd");
@@ -689,6 +695,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
689695
case VPInstruction::ComputeReductionResult:
690696
O << "compute-reduction-result";
691697
break;
698+
case VPInstruction::LogicalAnd:
699+
O << "logical-and";
700+
break;
692701
case VPInstruction::PtrAdd:
693702
O << "ptradd";
694703
break;

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
432432
; CHECK-NEXT: WIDEN ir<%cmp1> = icmp slt ir<%lsd>, ir<100>
433433
; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%cmp1>, !dbg /tmp/s.c:5:3
434434
; CHECK-NEXT: WIDEN ir<%cmp2> = icmp sge ir<%lsd>, ir<200>
435-
; CHECK-NEXT: EMIT vp<[[SEL1:%.+]]> = select vp<[[NOT1]]>, ir<%cmp2>, ir<false>, !dbg /tmp/s.c:5:21
435+
; CHECK-NEXT: EMIT vp<[[SEL1:%.+]]> = logical-and vp<[[NOT1]]>, ir<%cmp2>, !dbg /tmp/s.c:5:21
436436
; CHECK-NEXT: EMIT vp<[[OR1:%.+]]> = or vp<[[SEL1]]>, ir<%cmp1>
437437
; CHECK-NEXT: Successor(s): pred.sdiv
438438
; CHECK-EMPTY:
@@ -453,7 +453,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
453453
; CHECK-EMPTY:
454454
; CHECK-NEXT: if.then.0:
455455
; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp2>
456-
; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = select vp<[[NOT1]]>, vp<[[NOT2]]>, ir<false>
456+
; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = logical-and vp<[[NOT1]]>, vp<[[NOT2]]>
457457
; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]> ir<%psd>/vp<[[SEL2]]>
458458
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd>
459459
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0>

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) {
269269
; CHECK-NEXT: CLONE ir<%lv> = load ir<%gep.A.uniform>
270270
; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%iv>, ir<%k>
271271
; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp>
272-
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK]]>, vp<[[NOT2]]>, ir<false>
272+
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK]]>, vp<[[NOT2]]>
273273
; CHECK-NEXT: Successor(s): pred.store
274274
; CHECK-EMPTY:
275275
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -340,7 +340,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
340340
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
341341
; CHECK-NEXT: WIDEN ir<%c.1> = icmp ult ir<%iv>, ir<%j>
342342
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
343-
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1]]>, ir<%c.1>, ir<false>
343+
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.1>
344344
; CHECK-NEXT: Successor(s): pred.load
345345
; CHECK-EMPTY:
346346
; CHECK-NEXT: <xVFxUF> pred.load: {
@@ -362,7 +362,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
362362
; CHECK-EMPTY:
363363
; CHECK-NEXT: then.0.0:
364364
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.1>
365-
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
365+
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, vp<[[NOT]]>
366366
; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
367367
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
368368
; CHECK-NEXT: Successor(s): pred.store
@@ -441,7 +441,7 @@ define void @pred_cfg2(i32 %k, i32 %j) {
441441
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
442442
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
443443
; CHECK-NEXT: WIDEN ir<%c.1> = icmp ugt ir<%iv>, ir<%j>
444-
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1]]>, ir<%c.0>, ir<false>
444+
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0>
445445
; CHECK-NEXT: Successor(s): pred.load
446446
; CHECK-EMPTY:
447447
; CHECK-NEXT: <xVFxUF> pred.load: {
@@ -463,10 +463,10 @@ define void @pred_cfg2(i32 %k, i32 %j) {
463463
; CHECK-EMPTY:
464464
; CHECK-NEXT: then.0.0:
465465
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0>
466-
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
466+
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, vp<[[NOT]]>
467467
; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
468468
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
469-
; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = select vp<[[OR]]>, ir<%c.1>, ir<false>
469+
; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = logical-and vp<[[OR]]>, ir<%c.1>
470470
; CHECK-NEXT: Successor(s): pred.store
471471
; CHECK-EMPTY:
472472
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -549,7 +549,7 @@ define void @pred_cfg3(i32 %k, i32 %j) {
549549
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
550550
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
551551
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
552-
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1:%.+]]>, ir<%c.0>, ir<false>
552+
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK1:%.+]]>, ir<%c.0>
553553
; CHECK-NEXT: Successor(s): pred.load
554554
; CHECK-EMPTY:
555555
; CHECK-NEXT: <xVFxUF> pred.load: {
@@ -571,10 +571,10 @@ define void @pred_cfg3(i32 %k, i32 %j) {
571571
; CHECK-EMPTY:
572572
; CHECK-NEXT: then.0.0:
573573
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0>
574-
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
574+
; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, vp<[[NOT]]>
575575
; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
576576
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
577-
; CHECK-NEXT: EMIT vp<[[MASK5:%.+]]> = select vp<[[MASK4]]>, ir<%c.0>, ir<false>
577+
; CHECK-NEXT: EMIT vp<[[MASK5:%.+]]> = logical-and vp<[[MASK4]]>, ir<%c.0>
578578
; CHECK-NEXT: Successor(s): pred.store
579579
; CHECK-EMPTY:
580580
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -683,7 +683,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
683683
; CHECK-EMPTY:
684684
; CHECK-NEXT: loop.3:
685685
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
686-
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK]]>, ir<%c.0>, ir<false>
686+
; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK]]>, ir<%c.0>
687687
; CHECK-NEXT: WIDEN ir<%mul> = mul vp<[[PRED1]]>, vp<[[PRED2]]>
688688
; CHECK-NEXT: Successor(s): pred.store
689689
; CHECK-EMPTY:

0 commit comments

Comments
 (0)