Skip to content

Commit b8741cc

Browse files
committed
[VPlan] Relax assertion retrieving a scalar from VPTransformState::get.
The current assertion VPTransformState::get when retrieving a single scalar only does not account for cases where a def has multiple users, some demanding all scalar lanes, some demanding only a single scalar. For an example, see the modified test case. Relax the assertion by also allowing requesting scalar lanes only when the Def doesn't have only its first lane used. Fixes #88849.
1 parent 6b98ab9 commit b8741cc

File tree

2 files changed

+97
-2
lines changed

2 files changed

+97
-2
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
254254
Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
255255
if (NeedsScalar) {
256256
assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def, Part) ||
257+
!vputils::onlyFirstLaneUsed(Def) ||
257258
(hasScalarValue(Def, VPIteration(Part, 0)) &&
258259
Data.PerPartScalars[Def][Part].size() == 1)) &&
259260
"Trying to access a single scalar per part but has multiple scalars "

llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; REQUIRES: asserts
2-
; RUN: not --crash opt -p loop-vectorize -mtriple=s390x-unknown-linux -mcpu=z16 %s
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p loop-vectorize -mtriple=s390x-unknown-linux -mcpu=z16 -S %s | FileCheck %s
33

44
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
55

@@ -9,6 +9,94 @@ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
99
; all scalar lanes) and a VPInstruction that only demands the first lane.
1010
; Test case for https://github.com/llvm/llvm-project/issues/88849.
1111
define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(ptr noalias %dst, ptr noalias %src.1) {
12+
; CHECK-LABEL: define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(
13+
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
14+
; CHECK-NEXT: [[ENTRY:.*]]:
15+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16+
; CHECK: [[VECTOR_PH]]:
17+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18+
; CHECK: [[VECTOR_BODY]]:
19+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
20+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
21+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
22+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
23+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
24+
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 [[TMP0]], 4
25+
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[TMP1]], 4
26+
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 [[TMP2]], 4
27+
; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[TMP3]], 4
28+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
29+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP5]]
30+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP6]]
31+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP7]]
32+
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
33+
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
34+
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
35+
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1
36+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
37+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 1
38+
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 2
39+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP18]], i8 [[TMP15]], i32 3
40+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
41+
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP0]], 4
42+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 [[TMP21]]
43+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0
44+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
45+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
46+
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
47+
; CHECK: [[PRED_STORE_IF]]:
48+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 0
49+
; CHECK-NEXT: store i32 [[TMP25]], ptr [[DST]], align 4
50+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
51+
; CHECK: [[PRED_STORE_CONTINUE]]:
52+
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
53+
; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
54+
; CHECK: [[PRED_STORE_IF1]]:
55+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 1
56+
; CHECK-NEXT: store i32 [[TMP27]], ptr [[DST]], align 4
57+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
58+
; CHECK: [[PRED_STORE_CONTINUE2]]:
59+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
60+
; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
61+
; CHECK: [[PRED_STORE_IF3]]:
62+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
63+
; CHECK-NEXT: store i32 [[TMP29]], ptr [[DST]], align 4
64+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
65+
; CHECK: [[PRED_STORE_CONTINUE4]]:
66+
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
67+
; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
68+
; CHECK: [[PRED_STORE_IF5]]:
69+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
70+
; CHECK-NEXT: store i32 [[TMP31]], ptr [[DST]], align 4
71+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
72+
; CHECK: [[PRED_STORE_CONTINUE6]]:
73+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
74+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
75+
; CHECK: [[MIDDLE_BLOCK]]:
76+
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
77+
; CHECK: [[SCALAR_PH]]:
78+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
79+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
80+
; CHECK: [[LOOP_HEADER]]:
81+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
82+
; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4
83+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]]
84+
; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
85+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_1]], 0
86+
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
87+
; CHECK: [[THEN]]:
88+
; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV]], 4
89+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr @src, i64 0, i64 [[IV_OR]]
90+
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4
91+
; CHECK-NEXT: store i32 [[L_2]], ptr [[DST]], align 4
92+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
93+
; CHECK: [[LOOP_LATCH]]:
94+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
95+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4
96+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
97+
; CHECK: [[EXIT]]:
98+
; CHECK-NEXT: ret void
99+
;
12100
entry:
13101
br label %loop.header
14102

@@ -35,3 +123,9 @@ loop.latch:
35123
exit:
36124
ret void
37125
}
126+
;.
127+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
128+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
129+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
130+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
131+
;.

0 commit comments

Comments
 (0)