-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Don't predicate divs with invariant divisor when folding tail #98904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b6ab904
1782e6c
cb18fd6
d834270
f00467f
f0b1ea4
92e0a77
e7e7564
1574791
2c8de7c
8a71ea7
d1bdb1e
edee437
f9581e0
49c6677
bfe0e34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -3339,45 +3339,54 @@ bool LoopVectorizationCostModel::isScalarWithPredication( | |||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
// TODO: Fold into LoopVectorizationLegality::isMaskRequired. | ||||||||||||
bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const { | ||||||||||||
if (!blockNeedsPredicationForAnyReason(I->getParent())) | ||||||||||||
// If predication is not needed, avoid it. | ||||||||||||
// TODO: We can use the loop-preheader as context point here and get | ||||||||||||
// context sensitive reasoning for isSafeToSpeculativelyExecute. | ||||||||||||
if (!blockNeedsPredicationForAnyReason(I->getParent()) || | ||||||||||||
isSafeToSpeculativelyExecute(I) || | ||||||||||||
(isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) || | ||||||||||||
isa<BranchInst, PHINode>(I)) | ||||||||||||
return false; | ||||||||||||
|
||||||||||||
// Can we prove this instruction is safe to unconditionally execute? | ||||||||||||
// If not, we must use some form of predication. | ||||||||||||
// If the instruction was executed conditionally in the original scalar loop, | ||||||||||||
// predication is needed with a mask whose lanes are all possibly inactive. | ||||||||||||
if (Legal->blockNeedsPredication(I->getParent())) | ||||||||||||
return true; | ||||||||||||
|
||||||||||||
// All that remain are instructions with side-effects originally executed in | ||||||||||||
// the loop unconditionally, but now execute under a tail-fold mask (only) | ||||||||||||
// having at least one active lane (the first). If the side-effects of the | ||||||||||||
// instruction are invariant, executing it w/o (the tail-folding) mask is safe | ||||||||||||
// - it will cause the same side-effects as when masked. | ||||||||||||
switch(I->getOpcode()) { | ||||||||||||
default: | ||||||||||||
return false; | ||||||||||||
llvm_unreachable( | ||||||||||||
"instruction should have been considered by earlier checks"); | ||||||||||||
case Instruction::Call: | ||||||||||||
// Side-effects of a Call are assumed to be non-invariant, needing a | ||||||||||||
// (fold-tail) mask. | ||||||||||||
assert(Legal->isMaskRequired(I) && | ||||||||||||
"should have returned earlier for calls not needing a mask"); | ||||||||||||
return true; | ||||||||||||
case Instruction::Load: | ||||||||||||
// If the address is loop invariant no predication is needed. | ||||||||||||
return !Legal->isInvariant(getLoadStorePointerOperand(I)); | ||||||||||||
case Instruction::Store: { | ||||||||||||
if (!Legal->isMaskRequired(I)) | ||||||||||||
return false; | ||||||||||||
// When we know the load's address is loop invariant and the instruction | ||||||||||||
// in the original scalar loop was unconditionally executed then we | ||||||||||||
// don't need to mark it as a predicated instruction. Tail folding may | ||||||||||||
// introduce additional predication, but we're guaranteed to always have | ||||||||||||
// at least one active lane. We call Legal->blockNeedsPredication here | ||||||||||||
// because it doesn't query tail-folding. For stores, we need to prove | ||||||||||||
// both speculation safety (which follows from the same argument as loads), | ||||||||||||
// but also must prove the value being stored is correct. The easiest | ||||||||||||
// form of the later is to require that all values stored are the same. | ||||||||||||
if (Legal->isInvariant(getLoadStorePointerOperand(I)) && | ||||||||||||
(isa<LoadInst>(I) || | ||||||||||||
(isa<StoreInst>(I) && | ||||||||||||
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) && | ||||||||||||
!Legal->blockNeedsPredication(I->getParent())) | ||||||||||||
return false; | ||||||||||||
return true; | ||||||||||||
// For stores, we need to prove both speculation safety (which follows from | ||||||||||||
// the same argument as loads), but also must prove the value being stored | ||||||||||||
// is correct. The easiest form of the later is to require that all values | ||||||||||||
// stored are the same. | ||||||||||||
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) && | ||||||||||||
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand())); | ||||||||||||
} | ||||||||||||
case Instruction::UDiv: | ||||||||||||
case Instruction::SDiv: | ||||||||||||
case Instruction::SRem: | ||||||||||||
case Instruction::URem: | ||||||||||||
// TODO: We can use the loop-preheader as context point here and get | ||||||||||||
// context sensitive reasoning | ||||||||||||
return !isSafeToSpeculativelyExecute(I); | ||||||||||||
case Instruction::Call: | ||||||||||||
return Legal->isMaskRequired(I); | ||||||||||||
// If the divisor is loop-invariant no predication is needed. | ||||||||||||
return !TheLoop->isLoopInvariant(I->getOperand(1)); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -274,50 +274,38 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { | |
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() | ||
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 | ||
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP19]] | ||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0 | ||
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0 | ||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0 | ||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_1_I]], i64 0 | ||
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[X]], i64 0 | ||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0 | ||
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] | ||
; CHECK: [[VECTOR_BODY]]: | ||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[TMP22:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[TMP21]] | ||
; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[TMP24:%.*]] = urem <vscale x 2 x i64> [[VEC_IND]], [[TMP23]] | ||
; CHECK-NEXT: [[TMP25:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[TMP26:%.*]] = udiv <vscale x 2 x i64> [[TMP24]], [[TMP25]] | ||
; CHECK-NEXT: [[TMP27:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[TMP28:%.*]] = urem <vscale x 2 x i64> [[TMP24]], [[TMP27]] | ||
; CHECK-NEXT: [[TMP29:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[BROADCAST_SPLAT4]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[TMP30:%.*]] = udiv <vscale x 2 x i64> [[TMP28]], [[TMP29]] | ||
; CHECK-NEXT: [[TMP31:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> [[BROADCAST_SPLAT4]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[TMP32:%.*]] = urem <vscale x 2 x i64> [[TMP28]], [[TMP31]] | ||
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <vscale x 2 x i64> [[TMP22]], i32 0 | ||
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[X]], [[TMP33]] | ||
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <vscale x 2 x i64> [[TMP26]], i32 0 | ||
; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP34]], [[TMP35]] | ||
; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[TMP36]], [[X]] | ||
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <vscale x 2 x i64> [[TMP30]], i32 0 | ||
; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[TMP37]], [[TMP38]] | ||
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], [[X]] | ||
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <vscale x 2 x i64> [[TMP32]], i32 0 | ||
; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP40]], [[TMP41]] | ||
; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 32 | ||
; CHECK-NEXT: [[TMP44:%.*]] = ashr i64 [[TMP43]], 32 | ||
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP44]] | ||
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i64, ptr [[TMP45]], i32 0 | ||
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP22]], ptr [[TMP46]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) | ||
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 | ||
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems right - broadcasting the invariant divisors rather than filling poison for masked lanes, or computing a single unmasked lane if only it is used. |
||
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]] | ||
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]] | ||
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]] | ||
; CHECK-NEXT: [[TMP27:%.*]] = udiv i64 [[TMP26]], [[X]] | ||
; CHECK-NEXT: [[TMP28:%.*]] = urem i64 [[TMP26]], [[X]] | ||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i64> [[TMP23]], i32 0 | ||
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[X]], [[TMP29]] | ||
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], [[TMP25]] | ||
; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], [[X]] | ||
; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[TMP27]] | ||
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], [[X]] | ||
; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[TMP34]], [[TMP28]] | ||
; CHECK-NEXT: [[TMP36:%.*]] = shl i64 [[TMP35]], 32 | ||
; CHECK-NEXT: [[TMP37:%.*]] = ashr i64 [[TMP36]], 32 | ||
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP37]] | ||
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i64, ptr [[TMP38]], i32 0 | ||
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr [[TMP39]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] | ||
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) | ||
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) | ||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] | ||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] | ||
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0 | ||
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] | ||
; CHECK: [[MIDDLE_BLOCK]]: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -380,41 +380,40 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { | |
; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; PRED: vector.ph: | ||
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() | ||
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8 | ||
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this change from 8 to 4 intentional, desirable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The costs of both options are very close together. Previously the need to have a vector select in addition to the vector udiv made the difference in favor of 8. So for the current costs, this seems to be working as intended |
||
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 | ||
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP5]] | ||
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] | ||
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] | ||
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() | ||
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 | ||
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 | ||
; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() | ||
; PRED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 | ||
; PRED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 | ||
; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[TMP9]] | ||
; PRED-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], [[TMP9]] | ||
; PRED-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 | ||
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[TMP0]]) | ||
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[X]], i64 0 | ||
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer | ||
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP0]]) | ||
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0 | ||
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer | ||
; PRED-NEXT: br label [[VECTOR_BODY:%.*]] | ||
; PRED: vector.body: | ||
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; PRED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] | ||
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; PRED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] | ||
; PRED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 | ||
; PRED-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP13]] | ||
; PRED-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[TMP14]], i32 0 | ||
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP15]], i32 2, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i16> poison) | ||
; PRED-NEXT: [[TMP16:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i16> [[BROADCAST_SPLAT]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) | ||
; PRED-NEXT: [[TMP17:%.*]] = udiv <vscale x 8 x i16> [[WIDE_MASKED_LOAD]], [[TMP16]] | ||
; PRED-NEXT: [[TMP18:%.*]] = or <vscale x 8 x i16> [[TMP17]], [[VEC_PHI]] | ||
; PRED-NEXT: [[TMP19]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i16> [[TMP18]], <vscale x 8 x i16> [[VEC_PHI]] | ||
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP15]], i32 2, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i16> poison) | ||
; PRED-NEXT: [[TMP19:%.*]] = udiv <vscale x 4 x i16> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] | ||
; PRED-NEXT: [[TMP20:%.*]] = or <vscale x 4 x i16> [[TMP19]], [[VEC_PHI]] | ||
; PRED-NEXT: [[TMP16]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i16> [[TMP20]], <vscale x 4 x i16> [[VEC_PHI]] | ||
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] | ||
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP12]]) | ||
; PRED-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) | ||
; PRED-NEXT: [[TMP21:%.*]] = extractelement <vscale x 8 x i1> [[TMP20]], i32 0 | ||
; PRED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) | ||
; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) | ||
; PRED-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 | ||
; PRED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; PRED: middle.block: | ||
; PRED-NEXT: [[TMP22:%.*]] = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> [[TMP19]]) | ||
; PRED-NEXT: [[TMP22:%.*]] = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> [[TMP16]]) | ||
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] | ||
; PRED: scalar.ph: | ||
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this comment removal intended.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
code restructured in the latest version, with the code separate if being removed.