@@ -84,31 +84,30 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst
84
84
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 2
85
85
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 2
86
86
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
87
- ; CHECK-NEXT: [[TMP6:%.*]] = or i1 false, [[TMP5]]
88
- ; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
87
+ ; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
89
88
; CHECK: vector.ph:
90
89
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
91
90
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
92
91
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
93
92
; CHECK: vector.body:
94
93
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
95
- ; CHECK-NEXT: [[TMP7 :%.*]] = add i32 [[INDEX]], 0
96
- ; CHECK-NEXT: [[TMP8 :%.*]] = mul nuw nsw i32 [[TMP7 ]], -1
97
- ; CHECK-NEXT: [[TMP9 :%.*]] = add nuw nsw i32 [[TMP8 ]], 2
98
- ; CHECK-NEXT: [[TMP10 :%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP9 ]]
99
- ; CHECK-NEXT: [[TMP11 :%.*]] = getelementptr inbounds i32, i32* [[TMP10 ]], i32 0
100
- ; CHECK-NEXT: [[TMP12 :%.*]] = getelementptr inbounds i32, i32* [[TMP11 ]], i32 -3
101
- ; CHECK-NEXT: [[TMP13 :%.*]] = bitcast i32* [[TMP12 ]] to <4 x i32>*
102
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13 ]], align 4
94
+ ; CHECK-NEXT: [[TMP6 :%.*]] = add i32 [[INDEX]], 0
95
+ ; CHECK-NEXT: [[TMP7 :%.*]] = mul nuw nsw i32 [[TMP6 ]], -1
96
+ ; CHECK-NEXT: [[TMP8 :%.*]] = add nuw nsw i32 [[TMP7 ]], 2
97
+ ; CHECK-NEXT: [[TMP9 :%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP8 ]]
98
+ ; CHECK-NEXT: [[TMP10 :%.*]] = getelementptr inbounds i32, i32* [[TMP9 ]], i32 0
99
+ ; CHECK-NEXT: [[TMP11 :%.*]] = getelementptr inbounds i32, i32* [[TMP10 ]], i32 -3
100
+ ; CHECK-NEXT: [[TMP12 :%.*]] = bitcast i32* [[TMP11 ]] to <4 x i32>*
101
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12 ]], align 4
103
102
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
104
- ; CHECK-NEXT: [[TMP14 :%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[REVERSE]]
105
- ; CHECK-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP7 ]]
106
- ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, i32* [[TMP15 ]], i32 0
107
- ; CHECK-NEXT: [[TMP17 :%.*]] = bitcast i32* [[TMP16 ]] to <4 x i32>*
108
- ; CHECK-NEXT: store <4 x i32> [[TMP14 ]], <4 x i32>* [[TMP17 ]], align 4
103
+ ; CHECK-NEXT: [[TMP13 :%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[REVERSE]]
104
+ ; CHECK-NEXT: [[TMP14 :%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP6 ]]
105
+ ; CHECK-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i32, i32* [[TMP14 ]], i32 0
106
+ ; CHECK-NEXT: [[TMP16 :%.*]] = bitcast i32* [[TMP15 ]] to <4 x i32>*
107
+ ; CHECK-NEXT: store <4 x i32> [[TMP13 ]], <4 x i32>* [[TMP16 ]], align 4
109
108
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
110
- ; CHECK-NEXT: [[TMP18 :%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
111
- ; CHECK-NEXT: br i1 [[TMP18 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
109
+ ; CHECK-NEXT: [[TMP17 :%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
110
+ ; CHECK-NEXT: br i1 [[TMP17 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
112
111
; CHECK: middle.block:
113
112
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
114
113
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
@@ -120,8 +119,8 @@ define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst
120
119
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], -1
121
120
; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2
122
121
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]]
123
- ; CHECK-NEXT: [[TMP19 :%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
124
- ; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP19 ]]
122
+ ; CHECK-NEXT: [[TMP18 :%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
123
+ ; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP18 ]]
125
124
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]]
126
125
; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4
127
126
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1
@@ -358,8 +357,7 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt
358
357
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
359
358
; CHECK: vector.scevcheck:
360
359
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STRIDE:%.*]], 1
361
- ; CHECK-NEXT: [[TMP0:%.*]] = or i1 false, [[IDENT_CHECK]]
362
- ; CHECK-NEXT: br i1 [[TMP0]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
360
+ ; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
363
361
; CHECK: vector.ph:
364
362
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], 3
365
363
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
@@ -370,22 +368,22 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt
370
368
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
371
369
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
372
370
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
373
- ; CHECK-NEXT: [[TMP1 :%.*]] = add i32 [[INDEX]], 0
374
- ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1 ]], i32 [[N]])
375
- ; CHECK-NEXT: [[TMP2 :%.*]] = mul nuw nsw i32 [[TMP1 ]], [[STRIDE]]
376
- ; CHECK-NEXT: [[TMP3 :%.*]] = add nuw nsw i32 [[TMP2 ]], 2
377
- ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP3 ]]
378
- ; CHECK-NEXT: [[TMP5 :%.*]] = getelementptr inbounds i32, i32* [[TMP4 ]], i32 0
379
- ; CHECK-NEXT: [[TMP6 :%.*]] = bitcast i32* [[TMP5 ]] to <4 x i32>*
380
- ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP6 ]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
381
- ; CHECK-NEXT: [[TMP7 :%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
382
- ; CHECK-NEXT: [[TMP8 :%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP1 ]]
383
- ; CHECK-NEXT: [[TMP9 :%.*]] = getelementptr inbounds i32, i32* [[TMP8 ]], i32 0
384
- ; CHECK-NEXT: [[TMP10 :%.*]] = bitcast i32* [[TMP9 ]] to <4 x i32>*
385
- ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP7 ]], <4 x i32>* [[TMP10 ]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
371
+ ; CHECK-NEXT: [[TMP0 :%.*]] = add i32 [[INDEX]], 0
372
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0 ]], i32 [[N]])
373
+ ; CHECK-NEXT: [[TMP1 :%.*]] = mul nuw nsw i32 [[TMP0 ]], [[STRIDE]]
374
+ ; CHECK-NEXT: [[TMP2 :%.*]] = add nuw nsw i32 [[TMP1 ]], 2
375
+ ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP2 ]]
376
+ ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds i32, i32* [[TMP3 ]], i32 0
377
+ ; CHECK-NEXT: [[TMP5 :%.*]] = bitcast i32* [[TMP4 ]] to <4 x i32>*
378
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP5 ]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
379
+ ; CHECK-NEXT: [[TMP6 :%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
380
+ ; CHECK-NEXT: [[TMP7 :%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP0 ]]
381
+ ; CHECK-NEXT: [[TMP8 :%.*]] = getelementptr inbounds i32, i32* [[TMP7 ]], i32 0
382
+ ; CHECK-NEXT: [[TMP9 :%.*]] = bitcast i32* [[TMP8 ]] to <4 x i32>*
383
+ ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP6 ]], <4 x i32>* [[TMP9 ]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
386
384
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
387
- ; CHECK-NEXT: [[TMP11 :%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
388
- ; CHECK-NEXT: br i1 [[TMP11 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
385
+ ; CHECK-NEXT: [[TMP10 :%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
386
+ ; CHECK-NEXT: br i1 [[TMP10 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
389
387
; CHECK: middle.block:
390
388
; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]]
391
389
; CHECK: scalar.ph:
@@ -396,8 +394,8 @@ define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapt
396
394
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], [[STRIDE]]
397
395
; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2
398
396
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]]
399
- ; CHECK-NEXT: [[TMP12 :%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
400
- ; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP12 ]]
397
+ ; CHECK-NEXT: [[TMP11 :%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
398
+ ; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP11 ]]
401
399
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]]
402
400
; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4
403
401
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1
0 commit comments