Skip to content

Commit 464286b

Browse files
committed
[VPlan] Don't narrow interleave groups if there are vector pointers.
Do not narrow interleave groups if there are VectorPointer recipes and the plan was unrolled. The recipe implicitly uses VF from VPTransformState.
1 parent 768ccf6 commit 464286b

File tree

2 files changed

+26
-12
lines changed

2 files changed

+26
-12
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,6 +2590,14 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
25902590
if (R.mayWriteToMemory() && !InterleaveR)
25912591
return;
25922592

2593+
// Do not narrow interleave groups if there are VectorPointer recipes and
2594+
// the plan was unrolled. The recipe implicitly uses VF from
2595+
// VPTransformState.
2596+
// TODO: Remove restriction once the VF for the VectorPointer offset is
2597+
// modeled explicitly as operand.
2598+
if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
2599+
return;
2600+
25932601
// All other ops are allowed, but we reject uses that cannot be converted
25942602
// when checking all allowed consumers (store interleave groups) below.
25952603
if (!InterleaveR)

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -76,27 +76,33 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) {
7676
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7777
; CHECK: [[VECTOR_BODY]]:
7878
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
79-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
79+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2
8080
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
8181
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
8282
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2
83-
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 8
84-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
85-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
86-
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
87-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
88-
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
83+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
84+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
8985
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1
9086
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP0]], 1
9187
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
9288
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP7]]
93-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
94-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
89+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
90+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
91+
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
92+
; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
93+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC3]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
94+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <4 x i64> [[WIDE_VEC3]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
9595
; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
9696
; CHECK-NEXT: [[TMP11:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT3]], [[WIDE_LOAD1]]
97-
; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP8]], align 8
98-
; CHECK-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP9]], align 8
99-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
97+
; CHECK-NEXT: [[TMP15:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT]], [[STRIDED_VEC2]]
98+
; CHECK-NEXT: [[TMP16:%.*]] = mul <2 x i64> [[BROADCAST_SPLAT3]], [[STRIDED_VEC5]]
99+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
100+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
101+
; CHECK-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
102+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
103+
; CHECK-NEXT: [[INTERLEAVED_VEC6:%.*]] = shufflevector <4 x i64> [[TMP18]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
104+
; CHECK-NEXT: store <4 x i64> [[INTERLEAVED_VEC6]], ptr [[TMP9]], align 8
105+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
100106
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
101107
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
102108
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)