@@ -20,24 +20,32 @@ define void @pr75298_store_reduction_value_in_folded_loop(i64 %iv.start) optsize
20
20
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
21
21
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
22
22
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]]
23
+ ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
24
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
25
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
23
26
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
24
27
; CHECK: vector.body:
25
28
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
26
- ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
27
- ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
28
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
29
- ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
30
- ; CHECK-NEXT: [[TMP2]] = xor <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]]
29
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
30
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
31
+ ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
32
+ ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
33
+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
34
+ ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @c, align 4
35
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
36
+ ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
37
+ ; CHECK-NEXT: [[TMP3]] = xor <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT4]]
38
+ ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
31
39
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
32
- ; CHECK-NEXT: [[TMP3 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
33
- ; CHECK-NEXT: br i1 [[TMP3 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40
+ ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
41
+ ; CHECK-NEXT: br i1 [[TMP5 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
34
42
; CHECK: middle.block:
35
- ; CHECK-NEXT: [[TMP4 :%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP2 ]])
36
- ; CHECK-NEXT: store i32 [[TMP4 ]], ptr @a, align 4
43
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4 ]])
44
+ ; CHECK-NEXT: store i32 [[TMP6 ]], ptr @a, align 4
37
45
; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
38
46
; CHECK: scalar.ph:
39
47
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[PH]] ]
40
- ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PH]] ], [ [[TMP4 ]], [[MIDDLE_BLOCK]] ]
48
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PH]] ], [ [[TMP6 ]], [[MIDDLE_BLOCK]] ]
41
49
; CHECK-NEXT: br label [[LOOP:%.*]]
42
50
; CHECK: loop:
43
51
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
0 commit comments