1
- ; REQUIRES: asserts
2
- ; RUN: not --crash opt -p loop-vectorize -mtriple=s390x-unknown-linux -mcpu=z16 %s
1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: opt -p loop-vectorize -mtriple=s390x-unknown-linux -mcpu=z16 -S %s | FileCheck %s
3
3
4
4
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
5
5
@@ -9,6 +9,94 @@ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
9
9
; all scalar lanes) and a VPInstruction that only demands the first lane.
10
10
; Test case for https://github.com/llvm/llvm-project/issues/88849.
11
11
define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst (ptr noalias %dst , ptr noalias %src.1 ) {
12
+ ; CHECK-LABEL: define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(
13
+ ; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
14
+ ; CHECK-NEXT: [[ENTRY:.*]]:
15
+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16
+ ; CHECK: [[VECTOR_PH]]:
17
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18
+ ; CHECK: [[VECTOR_BODY]]:
19
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
20
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
21
+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
22
+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
23
+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
24
+ ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 [[TMP0]], 4
25
+ ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[TMP1]], 4
26
+ ; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 [[TMP2]], 4
27
+ ; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[TMP3]], 4
28
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
29
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP5]]
30
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP6]]
31
+ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP7]]
32
+ ; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
33
+ ; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
34
+ ; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
35
+ ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1
36
+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
37
+ ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 1
38
+ ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 2
39
+ ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP18]], i8 [[TMP15]], i32 3
40
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
41
+ ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP0]], 4
42
+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 [[TMP21]]
43
+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0
44
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
45
+ ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
46
+ ; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
47
+ ; CHECK: [[PRED_STORE_IF]]:
48
+ ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 0
49
+ ; CHECK-NEXT: store i32 [[TMP25]], ptr [[DST]], align 4
50
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
51
+ ; CHECK: [[PRED_STORE_CONTINUE]]:
52
+ ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
53
+ ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
54
+ ; CHECK: [[PRED_STORE_IF1]]:
55
+ ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 1
56
+ ; CHECK-NEXT: store i32 [[TMP27]], ptr [[DST]], align 4
57
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
58
+ ; CHECK: [[PRED_STORE_CONTINUE2]]:
59
+ ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
60
+ ; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
61
+ ; CHECK: [[PRED_STORE_IF3]]:
62
+ ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
63
+ ; CHECK-NEXT: store i32 [[TMP29]], ptr [[DST]], align 4
64
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
65
+ ; CHECK: [[PRED_STORE_CONTINUE4]]:
66
+ ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
67
+ ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
68
+ ; CHECK: [[PRED_STORE_IF5]]:
69
+ ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
70
+ ; CHECK-NEXT: store i32 [[TMP31]], ptr [[DST]], align 4
71
+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
72
+ ; CHECK: [[PRED_STORE_CONTINUE6]]:
73
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
74
+ ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
75
+ ; CHECK: [[MIDDLE_BLOCK]]:
76
+ ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
77
+ ; CHECK: [[SCALAR_PH]]:
78
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
79
+ ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
80
+ ; CHECK: [[LOOP_HEADER]]:
81
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
82
+ ; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4
83
+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]]
84
+ ; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
85
+ ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_1]], 0
86
+ ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
87
+ ; CHECK: [[THEN]]:
88
+ ; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV]], 4
89
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr @src, i64 0, i64 [[IV_OR]]
90
+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4
91
+ ; CHECK-NEXT: store i32 [[L_2]], ptr [[DST]], align 4
92
+ ; CHECK-NEXT: br label %[[LOOP_LATCH]]
93
+ ; CHECK: [[LOOP_LATCH]]:
94
+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
95
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4
96
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
97
+ ; CHECK: [[EXIT]]:
98
+ ; CHECK-NEXT: ret void
99
+ ;
12
100
entry:
13
101
br label %loop.header
14
102
@@ -35,3 +123,9 @@ loop.latch:
35
123
exit:
36
124
ret void
37
125
}
126
+ ;.
127
+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
128
+ ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
129
+ ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
130
+ ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
131
+ ;.
0 commit comments