Skip to content

Commit 8d29ae5

Browse files
committed
Emit intrinsic instead
1 parent 8eb30fe commit 8d29ae5

File tree

3 files changed

+56
-83
lines changed

3 files changed

+56
-83
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,28 +3628,18 @@ void VPAliasLaneMaskRecipe::execute(VPTransformState &State) {
36283628
Value *SinkValue = State.get(getSinkValue(), true);
36293629
Value *SourceValue = State.get(getSourceValue(), true);
36303630

3631-
Value *Diff = Builder.CreateSub(SourceValue, SinkValue, "sub.diff");
3632-
auto *Type = Diff->getType();
3633-
Value *Zero = ConstantInt::get(Type, 0);
3634-
if (!WriteAfterRead)
3635-
Diff = Builder.CreateIntrinsic(
3636-
Intrinsic::abs, {Type},
3637-
{Diff, ConstantInt::getFalse(Builder.getInt1Ty())}, nullptr, "sub.abs");
3638-
3639-
Value *DiffDiv = Builder.CreateSDiv(Diff, Zero, "diff");
3640-
// If the difference is positive then some elements may alias
3641-
auto CmpCode = WriteAfterRead ? CmpInst::Predicate::ICMP_SLE
3642-
: CmpInst::Predicate::ICMP_EQ;
3643-
Value *Cmp = Builder.CreateICmp(CmpCode, DiffDiv, Zero, "neg.compare");
3644-
3645-
// Splat the compare result then OR it with a lane mask
3646-
Value *Splat = Builder.CreateVectorSplat(State.VF, Cmp);
3647-
Value *DiffMask = Builder.CreateIntrinsic(
3648-
Intrinsic::get_active_lane_mask,
3649-
{VectorType::get(Builder.getInt1Ty(), State.VF), Type}, {Zero, DiffDiv},
3650-
nullptr, "ptr.diff.lane.mask");
3651-
Value *Or = Builder.CreateBinOp(Instruction::BinaryOps::Or, DiffMask, Splat);
3652-
State.set(this, Or, /*IsScalar=*/false);
3631+
unsigned IntrinsicID = WriteAfterRead
3632+
? Intrinsic::experimental_loop_dependence_war_mask
3633+
: Intrinsic::experimental_loop_dependence_raw_mask;
3634+
Value *SourceAsPtr = Builder.CreateCast(Instruction::IntToPtr, SourceValue,
3635+
Builder.getPtrTy());
3636+
Value *SinkAsPtr =
3637+
Builder.CreateCast(Instruction::IntToPtr, SinkValue, Builder.getPtrTy());
3638+
Value *AliasMask = Builder.CreateIntrinsic(
3639+
IntrinsicID, {VectorType::get(Builder.getInt1Ty(), State.VF)},
3640+
{SourceAsPtr, SinkAsPtr, Builder.getInt64(getAccessedElementSize())},
3641+
nullptr, "alias.lane.mask");
3642+
State.set(this, AliasMask, /*IsScalar=*/false);
36533643
}
36543644

36553645
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/test/Transforms/LoopVectorize/AArch64/alias_mask.ll

Lines changed: 41 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -5,62 +5,56 @@ define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) {
55
; CHECK-LABEL: define dso_local void @alias_mask(
66
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[B4:%.*]] = ptrtoint ptr [[B]] to i64
9-
; CHECK-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64
108
; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64
119
; CHECK-NEXT: [[C1:%.*]] = ptrtoint ptr [[C]] to i64
10+
; CHECK-NEXT: [[B3:%.*]] = ptrtoint ptr [[B]] to i64
11+
; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64
1212
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[N]], 0
1313
; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
1414
; CHECK: for.body.preheader:
1515
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1616
; CHECK: vector.memcheck:
1717
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1818
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
19-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[C1]], [[B2]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[C2]], [[B3]]
2020
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
2121
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
2222
; CHECK: vector.ph:
23-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16
25-
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
23+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
24+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP7]], 16
25+
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP6]], 1
2626
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP5]]
27-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
27+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
2828
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
29-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
30-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16
31-
; CHECK-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[B4]], [[C3]]
32-
; CHECK-NEXT: [[DIFF:%.*]] = sdiv i64 [[SUB_DIFF]], 0
33-
; CHECK-NEXT: [[NEG_COMPARE:%.*]] = icmp sle i64 [[DIFF]], 0
34-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i1> poison, i1 [[NEG_COMPARE]], i64 0
35-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i1> [[DOTSPLATINSERT]], <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
36-
; CHECK-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[DIFF]])
37-
; CHECK-NEXT: [[TMP8:%.*]] = or <vscale x 16 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
3829
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
3930
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16
40-
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[TMP10]]
41-
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[N]], [[TMP10]]
31+
; CHECK-NEXT: [[ALIAS_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.alias.lane.mask.nxv16i1.i64(i64 [[B2]], i64 [[C1]], i32 1, i1 true)
32+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
33+
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP8]], 16
34+
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[TMP15]]
35+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[N]], [[TMP15]]
4236
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 0
4337
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[N]])
4438
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4539
; CHECK: vector.body:
4640
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4741
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
4842
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
49-
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], [[TMP8]]
43+
; CHECK-NEXT: [[TMP25:%.*]] = and <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], [[ALIAS_LANE_MASK]]
5044
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
5145
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
52-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP17]], i32 1, <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i8> poison)
46+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP17]], i32 1, <vscale x 16 x i1> [[TMP25]], <vscale x 16 x i8> poison)
5347
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
5448
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0
55-
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP19]], i32 1, <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i8> poison)
49+
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP19]], i32 1, <vscale x 16 x i1> [[TMP25]], <vscale x 16 x i8> poison)
5650
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD5]], [[WIDE_MASKED_LOAD]]
5751
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP14]]
5852
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i32 0
59-
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP20]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[TMP15]])
60-
; CHECK-NEXT: [[TMP23:%.*]] = zext <vscale x 16 x i1> [[TMP8]] to <vscale x 16 x i8>
61-
; CHECK-NEXT: [[TMP24:%.*]] = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> [[TMP23]])
62-
; CHECK-NEXT: [[TMP25:%.*]] = zext i8 [[TMP24]] to i64
63-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP25]]
53+
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP20]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[TMP25]])
54+
; CHECK-NEXT: [[TMP28:%.*]] = zext <vscale x 16 x i1> [[ALIAS_LANE_MASK]] to <vscale x 16 x i8>
55+
; CHECK-NEXT: [[TMP23:%.*]] = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> [[TMP28]])
56+
; CHECK-NEXT: [[TMP24:%.*]] = zext i8 [[TMP23]] to i64
57+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP24]]
6458
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP13]])
6559
; CHECK-NEXT: [[TMP26:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
6660
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i32 0
@@ -91,10 +85,10 @@ define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n)
9185
; CHECK-LABEL: define i32 @alias_mask_read_after_write(
9286
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
9387
; CHECK-NEXT: entry:
94-
; CHECK-NEXT: [[C4:%.*]] = ptrtoint ptr [[C]] to i64
95-
; CHECK-NEXT: [[B3:%.*]] = ptrtoint ptr [[B]] to i64
9688
; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64
9789
; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
90+
; CHECK-NEXT: [[C3:%.*]] = ptrtoint ptr [[C]] to i64
91+
; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i64
9892
; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[N]], 0
9993
; CHECK-NEXT: br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
10094
; CHECK: for.body.preheader:
@@ -103,30 +97,23 @@ define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n)
10397
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
10498
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
10599
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
106-
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[C2]]
100+
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B2]], [[C3]]
107101
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
108102
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
109103
; CHECK: vector.ph:
110-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
111-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
112-
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
104+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
105+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP8]], 4
106+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP7]], 1
113107
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
114-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
108+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
115109
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
116-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
117-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
118-
; CHECK-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[C4]], [[B3]]
119-
; CHECK-NEXT: [[SUB_ABS:%.*]] = call i64 @llvm.abs.i64(i64 [[SUB_DIFF]], i1 false)
120-
; CHECK-NEXT: [[DIFF:%.*]] = sdiv i64 [[SUB_ABS]], 0
121-
; CHECK-NEXT: [[NEG_COMPARE:%.*]] = icmp eq i64 [[DIFF]], 0
122-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[NEG_COMPARE]], i64 0
123-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i1> [[DOTSPLATINSERT]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
124-
; CHECK-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[DIFF]])
125-
; CHECK-NEXT: [[TMP9:%.*]] = or <vscale x 4 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
126110
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
127111
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
128-
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]]
129-
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
112+
; CHECK-NEXT: [[ALIAS_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.alias.lane.mask.nxv4i1.i64(i64 [[C2]], i64 [[B1]], i32 4, i1 false)
113+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
114+
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP9]], 4
115+
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP16]]
116+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP16]]
130117
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
131118
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
132119
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -135,23 +122,23 @@ define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n)
135122
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
136123
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
137124
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
138-
; CHECK-NEXT: [[TMP16:%.*]] = and <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], [[TMP9]]
125+
; CHECK-NEXT: [[TMP31:%.*]] = and <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], [[ALIAS_LANE_MASK]]
139126
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
140127
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
141-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 2, <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> poison)
128+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 2, <vscale x 4 x i1> [[TMP31]], <vscale x 4 x i32> poison)
142129
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP15]]
143130
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
144-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP20]], i32 2, <vscale x 4 x i1> [[TMP16]])
131+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP20]], i32 2, <vscale x 4 x i1> [[TMP31]])
145132
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]]
146133
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0
147-
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP22]], i32 2, <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> poison)
134+
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP22]], i32 2, <vscale x 4 x i1> [[TMP31]], <vscale x 4 x i32> poison)
148135
; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
149136
; CHECK-NEXT: [[TMP24:%.*]] = add <vscale x 4 x i32> [[TMP23]], [[WIDE_MASKED_LOAD5]]
150-
; CHECK-NEXT: [[TMP25]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i32> [[VEC_PHI]]
151-
; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 4 x i1> [[TMP9]] to <vscale x 4 x i8>
152-
; CHECK-NEXT: [[TMP27:%.*]] = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> [[TMP26]])
153-
; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP27]] to i64
154-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP28]]
137+
; CHECK-NEXT: [[TMP25]] = select <vscale x 4 x i1> [[TMP31]], <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i32> [[VEC_PHI]]
138+
; CHECK-NEXT: [[TMP32:%.*]] = zext <vscale x 4 x i1> [[ALIAS_LANE_MASK]] to <vscale x 4 x i8>
139+
; CHECK-NEXT: [[TMP26:%.*]] = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> [[TMP32]])
140+
; CHECK-NEXT: [[TMP27:%.*]] = zext i8 [[TMP26]] to i64
141+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP27]]
155142
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]])
156143
; CHECK-NEXT: [[TMP29:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
157144
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <vscale x 4 x i1> [[TMP29]], i32 0

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
121121
; PRED-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT7]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
122122
; PRED-NEXT: [[TMP9:%.*]] = extractelement <vscale x 16 x i64> [[BROADCAST_SPLAT6]], i32 0
123123
; PRED-NEXT: [[TMP10:%.*]] = extractelement <vscale x 16 x i64> [[BROADCAST_SPLAT]], i32 0
124-
; PRED-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[TMP10]], [[TMP9]]
125-
; PRED-NEXT: [[DIFF:%.*]] = sdiv i64 [[SUB_DIFF]], 0
126-
; PRED-NEXT: [[NEG_COMPARE:%.*]] = icmp sle i64 [[DIFF]], 0
127-
; PRED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i1> poison, i1 [[NEG_COMPARE]], i64 0
128-
; PRED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i1> [[DOTSPLATINSERT]], <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
129-
; PRED-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[DIFF]])
130-
; PRED-NEXT: [[TMP11:%.*]] = or <vscale x 16 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
124+
; PRED-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP10]] to ptr
125+
; PRED-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP9]] to ptr
126+
; PRED-NEXT: [[TMP11:%.*]] = call <vscale x 16 x i1> @llvm.experimental.loop.dependence.war.mask.nxv16i1(ptr [[TMP19]], ptr [[TMP34]], i64 1)
131127
; PRED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
132128
; PRED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16
133129
; PRED-NEXT: [[TMP14:%.*]] = sub i64 [[TMP0]], [[TMP13]]

0 commit comments

Comments
 (0)