@@ -31,6 +31,8 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
31
31
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP2]]
32
32
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET:%.*]], i64 0
33
33
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
34
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET]], i64 0
35
+ ; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT9]], <8 x i16> poison, <8 x i32> zeroinitializer
34
36
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
35
37
; CHECK: vector.body:
36
38
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -42,7 +44,7 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
42
44
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8
43
45
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP5]], align 2
44
46
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]])
45
- ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT ]])
47
+ ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT10 ]])
46
48
; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[NEXT_GEP6]], align 2
47
49
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP6]], i64 8
48
50
; CHECK-NEXT: store <8 x i16> [[TMP7]], ptr [[TMP8]], align 2
@@ -116,6 +118,8 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
116
118
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967264
117
119
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET:%.*]], i64 0
118
120
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
121
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET]], i64 0
122
+ ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT6]], <16 x i8> poison, <16 x i32> zeroinitializer
119
123
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
120
124
; CHECK: vector.body:
121
125
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -125,7 +129,7 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
125
129
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16
126
130
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP1]], align 2
127
131
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]])
128
- ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT ]])
132
+ ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT7 ]])
129
133
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[NEXT_GEP3]], align 2
130
134
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 16
131
135
; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 2
@@ -136,45 +140,45 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
136
140
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
137
141
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
138
142
; CHECK: vec.epilog.iter.check:
139
- ; CHECK-NEXT: [[IND_END18 :%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
140
- ; CHECK-NEXT: [[IND_END15 :%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
141
- ; CHECK-NEXT: [[DOTCAST11 :%.*]] = trunc i64 [[N_VEC]] to i32
142
- ; CHECK-NEXT: [[IND_END12 :%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST11 ]]
143
+ ; CHECK-NEXT: [[IND_END20 :%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
144
+ ; CHECK-NEXT: [[IND_END17 :%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
145
+ ; CHECK-NEXT: [[DOTCAST13 :%.*]] = trunc i64 [[N_VEC]] to i32
146
+ ; CHECK-NEXT: [[IND_END14 :%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST13 ]]
143
147
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 24
144
148
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
145
149
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
146
150
; CHECK: vec.epilog.ph:
147
151
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
148
- ; CHECK-NEXT: [[N_VEC9 :%.*]] = and i64 [[TMP0]], 4294967288
149
- ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC9 ]] to i32
150
- ; CHECK-NEXT: [[IND_END10 :%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
151
- ; CHECK-NEXT: [[IND_END14 :%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC9 ]]
152
- ; CHECK-NEXT: [[IND_END17 :%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC9 ]]
153
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT25 :%.*]] = insertelement <8 x i8> poison, i8 [[OFFSET]], i64 0
154
- ; CHECK-NEXT: [[BROADCAST_SPLAT26 :%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT25 ]], <8 x i8> poison, <8 x i32> zeroinitializer
152
+ ; CHECK-NEXT: [[N_VEC11 :%.*]] = and i64 [[TMP0]], 4294967288
153
+ ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC11 ]] to i32
154
+ ; CHECK-NEXT: [[IND_END12 :%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
155
+ ; CHECK-NEXT: [[IND_END16 :%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC11 ]]
156
+ ; CHECK-NEXT: [[IND_END19 :%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC11 ]]
157
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT27 :%.*]] = insertelement <8 x i8> poison, i8 [[OFFSET]], i64 0
158
+ ; CHECK-NEXT: [[BROADCAST_SPLAT28 :%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT27 ]], <8 x i8> poison, <8 x i32> zeroinitializer
155
159
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
156
160
; CHECK: vec.epilog.vector.body:
157
- ; CHECK-NEXT: [[INDEX21 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT27 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
158
- ; CHECK-NEXT: [[NEXT_GEP22 :%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX21 ]]
159
- ; CHECK-NEXT: [[NEXT_GEP23 :%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX21 ]]
160
- ; CHECK-NEXT: [[WIDE_LOAD24 :%.*]] = load <8 x i8>, ptr [[NEXT_GEP22 ]], align 2
161
- ; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD24 ]], <8 x i8> [[BROADCAST_SPLAT26 ]])
162
- ; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[NEXT_GEP23 ]], align 2
163
- ; CHECK-NEXT: [[INDEX_NEXT27 ]] = add nuw i64 [[INDEX21 ]], 8
164
- ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT27 ]], [[N_VEC9 ]]
161
+ ; CHECK-NEXT: [[INDEX23 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT29 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
162
+ ; CHECK-NEXT: [[NEXT_GEP24 :%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX23 ]]
163
+ ; CHECK-NEXT: [[NEXT_GEP25 :%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX23 ]]
164
+ ; CHECK-NEXT: [[WIDE_LOAD26 :%.*]] = load <8 x i8>, ptr [[NEXT_GEP24 ]], align 2
165
+ ; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD26 ]], <8 x i8> [[BROADCAST_SPLAT28 ]])
166
+ ; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[NEXT_GEP25 ]], align 2
167
+ ; CHECK-NEXT: [[INDEX_NEXT29 ]] = add nuw i64 [[INDEX23 ]], 8
168
+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT29 ]], [[N_VEC11 ]]
165
169
; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
166
170
; CHECK: vec.epilog.middle.block:
167
- ; CHECK-NEXT: [[CMP_N20 :%.*]] = icmp eq i64 [[N_VEC9 ]], [[TMP0]]
168
- ; CHECK-NEXT: br i1 [[CMP_N20 ]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
171
+ ; CHECK-NEXT: [[CMP_N22 :%.*]] = icmp eq i64 [[N_VEC11 ]], [[TMP0]]
172
+ ; CHECK-NEXT: br i1 [[CMP_N22 ]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
169
173
; CHECK: vec.epilog.scalar.ph:
170
- ; CHECK-NEXT: [[BC_RESUME_VAL13 :%.*]] = phi i32 [ [[IND_END10 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12 ]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
171
- ; CHECK-NEXT: [[BC_RESUME_VAL16 :%.*]] = phi ptr [ [[IND_END14 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END15 ]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
172
- ; CHECK-NEXT: [[BC_RESUME_VAL19 :%.*]] = phi ptr [ [[IND_END17 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END18 ]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
174
+ ; CHECK-NEXT: [[BC_RESUME_VAL15 :%.*]] = phi i32 [ [[IND_END12 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END14 ]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
175
+ ; CHECK-NEXT: [[BC_RESUME_VAL18 :%.*]] = phi ptr [ [[IND_END16 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END17 ]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
176
+ ; CHECK-NEXT: [[BC_RESUME_VAL21 :%.*]] = phi ptr [ [[IND_END19 ]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END20 ]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
173
177
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
174
178
; CHECK: while.body:
175
- ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL13 ]], [[VEC_EPILOG_SCALAR_PH]] ]
176
- ; CHECK-NEXT: [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL16 ]], [[VEC_EPILOG_SCALAR_PH]] ]
177
- ; CHECK-NEXT: [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL19 ]], [[VEC_EPILOG_SCALAR_PH]] ]
179
+ ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL15 ]], [[VEC_EPILOG_SCALAR_PH]] ]
180
+ ; CHECK-NEXT: [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL18 ]], [[VEC_EPILOG_SCALAR_PH]] ]
181
+ ; CHECK-NEXT: [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL21 ]], [[VEC_EPILOG_SCALAR_PH]] ]
178
182
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_08]], i64 1
179
183
; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[PSRC_ADDR_08]], align 2
180
184
; CHECK-NEXT: [[TMP9:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TMP8]], i8 [[OFFSET]])
0 commit comments