@@ -115,3 +115,135 @@ for.body: ; preds = %for.body.preheader,
115
115
%exitcond.not = icmp eq i64 %indvars.iv.next , %wide.trip.count
116
116
br i1 %exitcond.not , label %for.cond.cleanup.loopexit , label %for.body
117
117
}
118
+
119
+ define i32 @alias_mask_read_after_write (ptr noalias %a , ptr %b , ptr %c , i32 %n ) {
120
+ ; CHECK-LABEL: define i32 @alias_mask_read_after_write(
121
+ ; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
122
+ ; CHECK-NEXT: entry:
123
+ ; CHECK-NEXT: [[C4:%.*]] = ptrtoint ptr [[C]] to i64
124
+ ; CHECK-NEXT: [[B3:%.*]] = ptrtoint ptr [[B]] to i64
125
+ ; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64
126
+ ; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
127
+ ; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N]], 0
128
+ ; CHECK-NEXT: br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
129
+ ; CHECK: for.body.preheader:
130
+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
131
+ ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
132
+ ; CHECK: vector.memcheck:
133
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
134
+ ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
135
+ ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
136
+ ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[C2]]
137
+ ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
138
+ ; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
139
+ ; CHECK: vector.ph:
140
+ ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
141
+ ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
142
+ ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
143
+ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP6]]
144
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
145
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
146
+ ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
147
+ ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
148
+ ; CHECK-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[C4]], [[B3]]
149
+ ; CHECK-NEXT: [[SUB_ABS:%.*]] = call i64 @llvm.abs.i64(i64 [[SUB_DIFF]], i1 false)
150
+ ; CHECK-NEXT: [[DIFF:%.*]] = sdiv i64 [[SUB_ABS]], 0
151
+ ; CHECK-NEXT: [[NEG_COMPARE:%.*]] = icmp eq i64 [[DIFF]], 0
152
+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> poison, i1 [[NEG_COMPARE]], i64 0
153
+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[DOTSPLATINSERT]], <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
154
+ ; CHECK-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[DIFF]])
155
+ ; CHECK-NEXT: [[TMP9:%.*]] = or <vscale x 8 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
156
+ ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
157
+ ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8
158
+ ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[TMP11]]
159
+ ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[WIDE_TRIP_COUNT]], [[TMP11]]
160
+ ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
161
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
162
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
163
+ ; CHECK: vector.body:
164
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
165
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
166
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
167
+ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
168
+ ; CHECK-NEXT: [[TMP16:%.*]] = and <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], [[TMP9]]
169
+ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP15]]
170
+ ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP17]], i32 0
171
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP18]], i32 2, <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> poison)
172
+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[TMP15]]
173
+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i32 0
174
+ ; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[WIDE_MASKED_LOAD]], ptr [[TMP20]], i32 2, <vscale x 8 x i1> [[TMP16]])
175
+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[TMP15]]
176
+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[TMP21]], i32 0
177
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP22]], i32 2, <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> poison)
178
+ ; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 8 x i16> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
179
+ ; CHECK-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i16> [[TMP23]], [[WIDE_MASKED_LOAD5]]
180
+ ; CHECK-NEXT: [[TMP25]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP24]], <vscale x 8 x i16> [[VEC_PHI]]
181
+ ; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i1> [[TMP9]] to <vscale x 8 x i8>
182
+ ; CHECK-NEXT: [[TMP27:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[TMP26]])
183
+ ; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP27]] to i64
184
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP28]]
185
+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP14]])
186
+ ; CHECK-NEXT: [[TMP29:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
187
+ ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <vscale x 8 x i1> [[TMP29]], i32 0
188
+ ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
189
+ ; CHECK: middle.block:
190
+ ; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[TMP25]])
191
+ ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
192
+ ; CHECK: scalar.ph:
193
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
194
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
195
+ ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
196
+ ; CHECK: for.cond.cleanup.loopexit:
197
+ ; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i16 [ [[ADD9:%.*]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
198
+ ; CHECK-NEXT: [[TMP32:%.*]] = zext i16 [[ADD9_LCSSA]] to i32
199
+ ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
200
+ ; CHECK: for.cond.cleanup:
201
+ ; CHECK-NEXT: [[TOTAL_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP32]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
202
+ ; CHECK-NEXT: ret i32 [[TOTAL_0_LCSSA]]
203
+ ; CHECK: for.body:
204
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
205
+ ; CHECK-NEXT: [[TOTAL_020:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD9]], [[FOR_BODY]] ]
206
+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV]]
207
+ ; CHECK-NEXT: [[TMP33:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
208
+ ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[INDVARS_IV]]
209
+ ; CHECK-NEXT: store i16 [[TMP33]], ptr [[ARRAYIDX2]], align 2
210
+ ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[INDVARS_IV]]
211
+ ; CHECK-NEXT: [[TMP34:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
212
+ ; CHECK-NEXT: [[ADD:%.*]] = add i16 [[TMP33]], [[TOTAL_020]]
213
+ ; CHECK-NEXT: [[ADD9]] = add i16 [[ADD]], [[TMP34]]
214
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
215
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
216
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
217
+ ;
218
+ entry:
219
+ %cmp19 = icmp sgt i32 %n , 0
220
+ br i1 %cmp19 , label %for.body.preheader , label %for.cond.cleanup
221
+
222
+ for.body.preheader: ; preds = %entry
223
+ %wide.trip.count = zext nneg i32 %n to i64
224
+ br label %for.body
225
+
226
+ for.cond.cleanup.loopexit: ; preds = %for.body
227
+ %add9.lcssa = phi i16 [ %add9 , %for.body ]
228
+ %0 = zext i16 %add9.lcssa to i32
229
+ br label %for.cond.cleanup
230
+
231
+ for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
232
+ %total.0.lcssa = phi i32 [ 0 , %entry ], [ %0 , %for.cond.cleanup.loopexit ]
233
+ ret i32 %total.0.lcssa
234
+
235
+ for.body: ; preds = %for.body.preheader, %for.body
236
+ %indvars.iv = phi i64 [ 0 , %for.body.preheader ], [ %indvars.iv.next , %for.body ]
237
+ %total.020 = phi i16 [ 0 , %for.body.preheader ], [ %add9 , %for.body ]
238
+ %arrayidx = getelementptr inbounds i16 , ptr %a , i64 %indvars.iv
239
+ %1 = load i16 , ptr %arrayidx , align 2
240
+ %arrayidx2 = getelementptr inbounds i16 , ptr %c , i64 %indvars.iv
241
+ store i16 %1 , ptr %arrayidx2 , align 2
242
+ %arrayidx6 = getelementptr inbounds i16 , ptr %b , i64 %indvars.iv
243
+ %2 = load i16 , ptr %arrayidx6 , align 2
244
+ %add = add i16 %1 , %total.020
245
+ %add9 = add i16 %add , %2
246
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
247
+ %exitcond.not = icmp eq i64 %indvars.iv.next , %wide.trip.count
248
+ br i1 %exitcond.not , label %for.cond.cleanup.loopexit , label %for.body
249
+ }
0 commit comments