Skip to content

Commit 36c68a2

Browse files
committed
[VPlan] Use pointer to member 0 as VPInterleaveRecipe's pointer arg.
1 parent aae7ac6 commit 36c68a2

17 files changed

+140
-207
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,15 @@ class VPBuilder {
220220
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
221221
}
222222

223-
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL,
223+
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
224224
const Twine &Name = "") {
225-
return createInstruction(VPInstruction::PtrAdd, {Ptr, Offset}, DL, Name);
225+
return tryInsertInstruction(new VPInstruction(
226+
Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
227+
}
228+
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
229+
const Twine &Name = "") {
230+
return tryInsertInstruction(new VPInstruction(
231+
Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
226232
}
227233

228234
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9005,8 +9005,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
90059005
// Interleave memory: for each Interleave Group we marked earlier as relevant
90069006
// for this VPlan, replace the Recipes widening its memory instructions with a
90079007
// single VPInterleaveRecipe at its insertion point.
9008-
VPlanTransforms::createInterleaveGroups(InterleaveGroups, RecipeBuilder,
9009-
CM.isScalarEpilogueAllowed());
9008+
VPlanTransforms::createInterleaveGroups(
9009+
*Plan, InterleaveGroups, RecipeBuilder, CM.isScalarEpilogueAllowed());
90109010

90119011
for (ElementCount VF : Range)
90129012
Plan->addVF(VF);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
956956
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
957957
};
958958

959-
protected:
960959
struct GEPFlagsTy {
961960
char IsInBounds : 1;
962961
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
@@ -1307,6 +1306,13 @@ class VPInstruction : public VPRecipeWithIRFlags,
13071306
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
13081307
}
13091308

1309+
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags = {false},
1310+
DebugLoc DL = {}, const Twine &Name = "")
1311+
: VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1312+
ArrayRef<VPValue *>({Ptr, Offset}),
1313+
GEPFlagsTy(Flags), DL),
1314+
Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1315+
13101316
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
13111317
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
13121318

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
644644
"can only generate first lane for PtrAdd");
645645
Value *Ptr = State.get(getOperand(0), /* IsScalar */ true);
646646
Value *Addend = State.get(getOperand(1), /* IsScalar */ true);
647-
return Builder.CreatePtrAdd(Ptr, Addend, Name);
647+
return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
648+
: Builder.CreatePtrAdd(Ptr, Addend, Name);
648649
}
649650
case VPInstruction::ResumePhi: {
650651
Value *IncomingFromVPlanPred =
@@ -2470,51 +2471,37 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
24702471
unsigned InterleaveFactor = Group->getFactor();
24712472
auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
24722473

2473-
// Prepare for the new pointers.
2474-
unsigned Index = Group->getIndex(Instr);
2475-
24762474
// TODO: extend the masked interleaved-group support to reversed access.
24772475
VPValue *BlockInMask = getMask();
24782476
assert((!BlockInMask || !Group->isReverse()) &&
24792477
"Reversed masked interleave-group not supported.");
24802478

2481-
Value *Idx;
2479+
Value *Index;
24822480
// If the group is reverse, adjust the index to refer to the last vector lane
24832481
// instead of the first. We adjust the index from the first vector lane,
24842482
// rather than directly getting the pointer for lane VF - 1, because the
24852483
// pointer operand of the interleaved access is supposed to be uniform.
24862484
if (Group->isReverse()) {
24872485
Value *RuntimeVF =
24882486
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2489-
Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2490-
Idx = State.Builder.CreateMul(Idx,
2491-
State.Builder.getInt32(Group->getFactor()));
2492-
Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index));
2493-
Idx = State.Builder.CreateNeg(Idx);
2494-
} else
2495-
Idx = State.Builder.getInt32(-Index);
2487+
Index = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2488+
Index = State.Builder.CreateMul(Index,
2489+
State.Builder.getInt32(Group->getFactor()));
2490+
Index = State.Builder.CreateNeg(Index);
2491+
} else {
2492+
// TODO: Drop redundant 0-index GEP as follow-up.
2493+
Index = State.Builder.getInt32(0);
2494+
}
24962495

24972496
VPValue *Addr = getAddr();
24982497
Value *ResAddr = State.get(Addr, VPLane(0));
24992498
if (auto *I = dyn_cast<Instruction>(ResAddr))
25002499
State.setDebugLocFrom(I->getDebugLoc());
25012500

2502-
// Notice current instruction could be any index. Need to adjust the address
2503-
// to the member of index 0.
2504-
//
2505-
// E.g. a = A[i+1]; // Member of index 1 (Current instruction)
2506-
// b = A[i]; // Member of index 0
2507-
// Current pointer is pointed to A[i+1], adjust it to A[i].
2508-
//
2509-
// E.g. A[i+1] = a; // Member of index 1
2510-
// A[i] = b; // Member of index 0
2511-
// A[i+2] = c; // Member of index 2 (Current instruction)
2512-
// Current pointer is pointed to A[i+2], adjust it to A[i].
2513-
25142501
bool InBounds = false;
25152502
if (auto *gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
25162503
InBounds = gep->isInBounds();
2517-
ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Idx, "", InBounds);
2504+
ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
25182505

25192506
State.setDebugLocFrom(Instr->getDebugLoc());
25202507
Value *PoisonVec = PoisonValue::get(VecTy);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,14 +1590,19 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
15901590
}
15911591

15921592
void VPlanTransforms::createInterleaveGroups(
1593-
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> &InterleaveGroups,
1593+
VPlan &Plan,
1594+
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
1595+
&InterleaveGroups,
15941596
VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed) {
1597+
if (InterleaveGroups.empty())
1598+
return;
1599+
15951600
// Interleave memory: for each Interleave Group we marked earlier as relevant
15961601
// for this VPlan, replace the Recipes widening its memory instructions with a
15971602
// single VPInterleaveRecipe at its insertion point.
1603+
VPDominatorTree VPDT;
1604+
VPDT.recalculate(Plan);
15981605
for (const auto *IG : InterleaveGroups) {
1599-
auto *Recipe =
1600-
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
16011606
SmallVector<VPValue *, 4> StoredValues;
16021607
for (unsigned i = 0; i < IG->getFactor(); ++i)
16031608
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
@@ -1607,9 +1612,38 @@ void VPlanTransforms::createInterleaveGroups(
16071612

16081613
bool NeedsMaskForGaps =
16091614
IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed;
1610-
auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,
1611-
Recipe->getMask(), NeedsMaskForGaps);
1612-
VPIG->insertBefore(Recipe);
1615+
1616+
Instruction *IRInsertPos = IG->getInsertPos();
1617+
auto *InsertPos =
1618+
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
1619+
1620+
// Get or create the start address for the interleave group.
1621+
auto *Start =
1622+
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0)));
1623+
VPValue *Addr = Start->getAddr();
1624+
if (!VPDT.properlyDominates(Addr->getDefiningRecipe(), InsertPos)) {
1625+
bool InBounds = false;
1626+
if (auto *Gep = dyn_cast<GetElementPtrInst>(
1627+
getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))
1628+
InBounds = Gep->isInBounds();
1629+
1630+
// We cannot re-use the address of the first member because it does not
1631+
// dominate the insert position. Use the address of the insert position
1632+
// and create a PtrAdd to adjust the index to start at the first member.
1633+
APInt Offset(32,
1634+
getLoadStoreType(IRInsertPos)->getScalarSizeInBits() / 8 *
1635+
IG->getIndex(IRInsertPos),
1636+
/*IsSigned=*/true);
1637+
VPValue *OffsetVPV = Plan.getOrAddLiveIn(
1638+
ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));
1639+
VPBuilder B(InsertPos);
1640+
Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
1641+
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
1642+
}
1643+
auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,
1644+
InsertPos->getMask(), NeedsMaskForGaps);
1645+
VPIG->insertBefore(InsertPos);
1646+
16131647
unsigned J = 0;
16141648
for (unsigned i = 0; i < IG->getFactor(); ++i)
16151649
if (Instruction *Member = IG->getMember(i)) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ struct VPlanTransforms {
114114
// widening its memory instructions with a single VPInterleaveRecipe at its
115115
// insertion point.
116116
static void createInterleaveGroups(
117-
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> &InterleaveGroups,
117+
VPlan &Plan,
118+
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
119+
&InterleaveGroups,
118120
VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed);
119121

120122
/// Remove dead recipes from \p Plan.

llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst
1414
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1515
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3
1616
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
17-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2
18-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -2
17+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
1918
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2019
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
2120
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,11 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
4141
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
4242
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
4343
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
44-
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
4544
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
4645
; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <vscale x 4 x i32> [[TMP4]], [[BROADCAST_SPLAT2]]
47-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP5]]
48-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 -4
46+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[OFFSET_IDX]]
4947
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]])
50-
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4
48+
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4
5149
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
5250
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
5351
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -121,20 +119,19 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
121119
; CHECK: vector.body:
122120
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
123121
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
122+
; CHECK-NEXT: [[OFFSET_IDX:%.+]] = shl i64 [[INDEX]], 1
124123
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]
125124
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
126125
; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
127126
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[TMP7]]
128127
; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
129128
; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
130129
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[BROADCAST_SPLAT]], [[TMP9]]
130+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[OFFSET_IDX]]
131131
; CHECK-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER1]] to <vscale x 4 x i32>
132132
; CHECK-NEXT: [[TMP12:%.*]] = mul nsw <vscale x 4 x i32> [[BROADCAST_SPLAT3]], [[TMP11]]
133-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i64> [[TMP7]], i64 0
134-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP13]]
135-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -4
136133
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP12]])
137-
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP15]], align 4
134+
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4
138135
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
139136
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
140137
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
@@ -404,10 +401,10 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
404401
; CHECK-NEXT: [[REVERSE1:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP11]])
405402
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <vscale x 4 x i32> [[REVERSE]], [[VEC_IND]]
406403
; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <vscale x 4 x i32> [[REVERSE1]], [[VEC_IND]]
407-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 1
404+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 0
408405
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
409406
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP15]], 3
410-
; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 1, [[TMP16]]
407+
; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 2, [[TMP16]]
411408
; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
412409
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP18]]
413410
; CHECK-NEXT: [[REVERSE2:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP12]])
@@ -715,16 +712,14 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n
715712
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
716713
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
717714
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
718-
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
719715
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <vscale x 4 x i32> [[TMP4]], [[TMP3]]
716+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
720717
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
721718
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0
722719
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 1
723720
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <vscale x 4 x i32> [[TMP8]], [[TMP7]]
724-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP5]]
725-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 -4
726721
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP9]])
727-
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 4
722+
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4
728723
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
729724
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
730725
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
@@ -1271,14 +1266,11 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
12711266
; CHECK: vector.body:
12721267
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12731268
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1274-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
1275-
; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
12761269
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]]
1277-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
12781270
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
1279-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -4
1271+
; CHECK-NEXT: [[P:%.+]] = extractelement <vscale x 4 x ptr> [[TMP13]], i64 0
12801272
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> [[BROADCAST_SPLAT4]])
1281-
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP15]], align 4
1273+
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[P]], align 4
12821274
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
12831275
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
12841276
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)