Skip to content

Commit b79c14c

Browse files
committed
!fixup finalize comments
1 parent 315de55 commit b79c14c

File tree

2 files changed

+34
-29
lines changed

2 files changed

+34
-29
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,19 +2236,26 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
22362236
}
22372237
}
22382238

2239-
/// Returns true if \p V used at \p Idx of a wide recipe is defined by a
2240-
/// VPWidenLoadRecipe or VPInterleaveRecipe that can be converted to a narrower
2241-
/// recipe. A VPWidenLoadRecipe can be narrowed to an index independent load if
2242-
/// it feeds all the wide ops at all indices (checked by via the operands of the
2243-
/// wide recipe at lane0, \p R0). A VPInterleaveRecipe can be narrowed to a wide
2244-
/// load, if \p V used at index \p Idx are defined at \p Idx of the interleave
2245-
/// group.
2246-
static bool canNarrowLoad(VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
2239+
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
2240+
/// converted to a narrower recipe. \p V is used by a wide recipe \p WideMember
2241+
/// that feeds a store interleave group at index \p Idx, \p WideMember0 is the
2242+
/// recipe feeding the same interleave group at index 0. A VPWidenLoadRecipe can
2243+
/// be narrowed to an index-independent load if it feeds all wide ops at all
2244+
/// indices (checked by via the operands of the wide recipe at lane0, \p
2245+
/// WideMember0). A VPInterleaveRecipe can be narrowed to a wide load, if \p V
2246+
/// is defined at \p Idx of a load interleave group.
2247+
static bool canNarrowLoad(VPWidenRecipe *WideMember0, VPWidenRecipe *WideMember,
2248+
VPValue *V, unsigned Idx) {
22472249
auto *DefR = V->getDefiningRecipe();
22482250
if (!DefR)
22492251
return false;
22502252
if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR))
2251-
return !W->getMask() && is_contained(R0->operands(), V);
2253+
return !W->getMask() &&
2254+
all_of(zip(WideMember0->operands(), WideMember->operands()),
2255+
[V](const auto P) {
2256+
const auto &[WideMember0Op, WideMemberOp] = P;
2257+
return (WideMember0Op == V) == (WideMemberOp == V);
2258+
});
22522259

22532260
if (auto *IR = dyn_cast<VPInterleaveRecipe>(DefR))
22542261
return IR->getInterleaveGroup()->getFactor() ==
@@ -2360,8 +2367,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
23602367
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
23612368
R->getNumOperands() > 2)
23622369
return;
2363-
if (any_of(R->operands(), [WideMember0, Idx = I](VPValue *V) {
2364-
return !canNarrowLoad(WideMember0, V, Idx);
2370+
if (any_of(R->operands(), [WideMember0, Idx = I, R](VPValue *V) {
2371+
return !canNarrowLoad(WideMember0, R, V, Idx);
23652372
}))
23662373
return;
23672374
}

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,9 @@ define void @test_2xi64(ptr noalias %data, ptr noalias %factor) {
195195
; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
196196
; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1
197197
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
198-
; VF2-NEXT: [[TMP23:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
199-
; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP23]]
200-
; VF2-NEXT: store <2 x i64> [[TMP24]], ptr [[TMP7]], align 8
198+
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
199+
; VF2-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
200+
; VF2-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP7]], align 8
201201
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
202202
; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
203203
; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -1099,30 +1099,28 @@ define void @test_2xi64_sub_of_wide_loads_ops_swapped(ptr noalias %data, ptr noa
10991099
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
11001100
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
11011101
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
1102-
; VF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
1103-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
1104-
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
1105-
; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
1106-
; VF2-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
1107-
; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
1102+
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
1103+
; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
11081104
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
11091105
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
11101106
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 2
1111-
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
1112-
; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0
1113-
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
1114-
; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
1115-
; VF2-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i64 0
1116-
; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT5]], <2 x i64> poison, <2 x i32> zeroinitializer
1107+
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
1108+
; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
11171109
; VF2-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
11181110
; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
11191111
; VF2-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP0]], 1
11201112
; VF2-NEXT: [[TMP20:%.*]] = shl nsw i64 [[TMP1]], 1
11211113
; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]]
11221114
; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]]
1123-
; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[DATA_0]], align 8
1124-
; VF2-NEXT: store <2 x i64> [[TMP13]], ptr [[DATA_1]], align 8
1125-
; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 2
1115+
; VF2-NEXT: [[TMP14:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]]
1116+
; VF2-NEXT: [[TMP15:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]]
1117+
; VF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1118+
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP16]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1119+
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA_0]], align 8
1120+
; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1121+
; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1122+
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC4]], ptr [[DATA_1]], align 8
1123+
; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 4
11261124
; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
11271125
; VF2-NEXT: br i1 [[EC]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
11281126
; VF2: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)