@@ -32,14 +32,14 @@ define i32 @test_select(ptr noalias nocapture readonly %blk1, ptr noalias nocapt
32
32
; CHECK-NEXT: [[J_025:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
33
33
; CHECK-NEXT: [[P2_024:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ]
34
34
; CHECK-NEXT: [[P1_023:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
35
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32>, ptr [[P1_023]], align 4
36
- ; CHECK-NEXT: [[TMP3 :%.*]] = load <4 x i32>, ptr [[P2_024]], align 4
37
- ; CHECK-NEXT: [[TMP4 :%.*]] = sub nsw <4 x i32> [[TMP1 ]], [[TMP3 ]]
38
- ; CHECK-NEXT: [[TMP5 :%.*]] = icmp slt <4 x i32> [[TMP4 ]], zeroinitializer
39
- ; CHECK-NEXT: [[TMP6 :%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4 ]]
40
- ; CHECK-NEXT: [[TMP7 :%.*]] = select <4 x i1> [[TMP5 ]], <4 x i32> [[TMP6 ]], <4 x i32> [[TMP4 ]]
41
- ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7 ]])
42
- ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8 ]], [[S_026]]
35
+ ; CHECK-NEXT: [[TMP0 :%.*]] = load <4 x i32>, ptr [[P1_023]], align 4
36
+ ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32>, ptr [[P2_024]], align 4
37
+ ; CHECK-NEXT: [[TMP2 :%.*]] = sub nsw <4 x i32> [[TMP0 ]], [[TMP1 ]]
38
+ ; CHECK-NEXT: [[TMP3 :%.*]] = icmp slt <4 x i32> [[TMP2 ]], zeroinitializer
39
+ ; CHECK-NEXT: [[TMP4 :%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP2 ]]
40
+ ; CHECK-NEXT: [[TMP5 :%.*]] = select <4 x i1> [[TMP3 ]], <4 x i32> [[TMP4 ]], <4 x i32> [[TMP2 ]]
41
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5 ]])
42
+ ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP6 ]], [[S_026]]
43
43
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[P1_023]], i64 [[IDX_EXT]]
44
44
; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, ptr [[P2_024]], i64 [[IDX_EXT]]
45
45
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1
@@ -150,11 +150,11 @@ define i32 @reduction_with_br(ptr noalias nocapture readonly %blk1, ptr noalias
150
150
; CHECK-NEXT: [[J_019:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
151
151
; CHECK-NEXT: [[P2_018:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ]
152
152
; CHECK-NEXT: [[P1_017:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ]
153
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32>, ptr [[P1_017]], align 4
154
- ; CHECK-NEXT: [[TMP3 :%.*]] = load <4 x i32>, ptr [[P2_018]], align 4
155
- ; CHECK-NEXT: [[TMP4 :%.*]] = mul nsw <4 x i32> [[TMP3 ]], [[TMP1 ]]
156
- ; CHECK-NEXT: [[TMP5 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4 ]])
157
- ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP5 ]], [[S_020]]
153
+ ; CHECK-NEXT: [[TMP0 :%.*]] = load <4 x i32>, ptr [[P1_017]], align 4
154
+ ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32>, ptr [[P2_018]], align 4
155
+ ; CHECK-NEXT: [[TMP2 :%.*]] = mul nsw <4 x i32> [[TMP1 ]], [[TMP0 ]]
156
+ ; CHECK-NEXT: [[TMP3 :%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2 ]])
157
+ ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP3 ]], [[S_020]]
158
158
; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
159
159
; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]]
160
160
; CHECK: if.end:
@@ -245,16 +245,16 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali
245
245
; CHECK-NEXT: [[J_046:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END_86]] ]
246
246
; CHECK-NEXT: [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
247
247
; CHECK-NEXT: [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
248
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
249
- ; CHECK-NEXT: [[TMP2 :%.*]] = zext <8 x i8> [[TMP1 ]] to <8 x i32>
250
- ; CHECK-NEXT: [[TMP4 :%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
251
- ; CHECK-NEXT: [[TMP5 :%.*]] = zext <8 x i8> [[TMP4 ]] to <8 x i32>
252
- ; CHECK-NEXT: [[TMP6 :%.*]] = sub nsw <8 x i32> [[TMP2 ]], [[TMP5 ]]
253
- ; CHECK-NEXT: [[TMP7 :%.*]] = icmp slt <8 x i32> [[TMP6 ]], zeroinitializer
254
- ; CHECK-NEXT: [[TMP8 :%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6 ]]
255
- ; CHECK-NEXT: [[TMP9 :%.*]] = select <8 x i1> [[TMP7 ]], <8 x i32> [[TMP8 ]], <8 x i32> [[TMP6 ]]
256
- ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9 ]])
257
- ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10 ]], [[S_047]]
248
+ ; CHECK-NEXT: [[TMP0 :%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
249
+ ; CHECK-NEXT: [[TMP1 :%.*]] = zext <8 x i8> [[TMP0 ]] to <8 x i32>
250
+ ; CHECK-NEXT: [[TMP2 :%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
251
+ ; CHECK-NEXT: [[TMP3 :%.*]] = zext <8 x i8> [[TMP2 ]] to <8 x i32>
252
+ ; CHECK-NEXT: [[TMP4 :%.*]] = sub nsw <8 x i32> [[TMP1 ]], [[TMP3 ]]
253
+ ; CHECK-NEXT: [[TMP5 :%.*]] = icmp slt <8 x i32> [[TMP4 ]], zeroinitializer
254
+ ; CHECK-NEXT: [[TMP6 :%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP4 ]]
255
+ ; CHECK-NEXT: [[TMP7 :%.*]] = select <8 x i1> [[TMP5 ]], <8 x i32> [[TMP6 ]], <8 x i32> [[TMP4 ]]
256
+ ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7 ]])
257
+ ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8 ]], [[S_047]]
258
258
; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
259
259
; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
260
260
; CHECK: if.end.86:
0 commit comments