Skip to content

Commit f090625

Browse files
committed
Add read-after-write test
1 parent df78d68 commit f090625

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/alias_mask.ll

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,135 @@ for.body: ; preds = %for.body.preheader,
115115
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
116116
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
117117
}
118+
119+
define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
120+
; CHECK-LABEL: define i32 @alias_mask_read_after_write(
121+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
122+
; CHECK-NEXT: entry:
123+
; CHECK-NEXT: [[C4:%.*]] = ptrtoint ptr [[C]] to i64
124+
; CHECK-NEXT: [[B3:%.*]] = ptrtoint ptr [[B]] to i64
125+
; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64
126+
; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
127+
; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N]], 0
128+
; CHECK-NEXT: br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
129+
; CHECK: for.body.preheader:
130+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
131+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
132+
; CHECK: vector.memcheck:
133+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
134+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
135+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
136+
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[C2]]
137+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
138+
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
139+
; CHECK: vector.ph:
140+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
141+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
142+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
143+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP6]]
144+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
145+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
146+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
147+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
148+
; CHECK-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[C4]], [[B3]]
149+
; CHECK-NEXT: [[SUB_ABS:%.*]] = call i64 @llvm.abs.i64(i64 [[SUB_DIFF]], i1 false)
150+
; CHECK-NEXT: [[DIFF:%.*]] = sdiv i64 [[SUB_ABS]], 0
151+
; CHECK-NEXT: [[NEG_COMPARE:%.*]] = icmp eq i64 [[DIFF]], 0
152+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> poison, i1 [[NEG_COMPARE]], i64 0
153+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[DOTSPLATINSERT]], <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
154+
; CHECK-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[DIFF]])
155+
; CHECK-NEXT: [[TMP9:%.*]] = or <vscale x 8 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
156+
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
157+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8
158+
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[TMP11]]
159+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[WIDE_TRIP_COUNT]], [[TMP11]]
160+
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
161+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
162+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
163+
; CHECK: vector.body:
164+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
165+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
166+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
167+
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
168+
; CHECK-NEXT: [[TMP16:%.*]] = and <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], [[TMP9]]
169+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP15]]
170+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP17]], i32 0
171+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP18]], i32 2, <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> poison)
172+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[TMP15]]
173+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i32 0
174+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[WIDE_MASKED_LOAD]], ptr [[TMP20]], i32 2, <vscale x 8 x i1> [[TMP16]])
175+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[TMP15]]
176+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[TMP21]], i32 0
177+
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP22]], i32 2, <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> poison)
178+
; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 8 x i16> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
179+
; CHECK-NEXT: [[TMP24:%.*]] = add <vscale x 8 x i16> [[TMP23]], [[WIDE_MASKED_LOAD5]]
180+
; CHECK-NEXT: [[TMP25]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP24]], <vscale x 8 x i16> [[VEC_PHI]]
181+
; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i1> [[TMP9]] to <vscale x 8 x i8>
182+
; CHECK-NEXT: [[TMP27:%.*]] = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> [[TMP26]])
183+
; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP27]] to i64
184+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP28]]
185+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP14]])
186+
; CHECK-NEXT: [[TMP29:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
187+
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <vscale x 8 x i1> [[TMP29]], i32 0
188+
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
189+
; CHECK: middle.block:
190+
; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[TMP25]])
191+
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
192+
; CHECK: scalar.ph:
193+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
194+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
195+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
196+
; CHECK: for.cond.cleanup.loopexit:
197+
; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i16 [ [[ADD9:%.*]], [[FOR_BODY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
198+
; CHECK-NEXT: [[TMP32:%.*]] = zext i16 [[ADD9_LCSSA]] to i32
199+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
200+
; CHECK: for.cond.cleanup:
201+
; CHECK-NEXT: [[TOTAL_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP32]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
202+
; CHECK-NEXT: ret i32 [[TOTAL_0_LCSSA]]
203+
; CHECK: for.body:
204+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
205+
; CHECK-NEXT: [[TOTAL_020:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD9]], [[FOR_BODY]] ]
206+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV]]
207+
; CHECK-NEXT: [[TMP33:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
208+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[INDVARS_IV]]
209+
; CHECK-NEXT: store i16 [[TMP33]], ptr [[ARRAYIDX2]], align 2
210+
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[INDVARS_IV]]
211+
; CHECK-NEXT: [[TMP34:%.*]] = load i16, ptr [[ARRAYIDX6]], align 2
212+
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[TMP33]], [[TOTAL_020]]
213+
; CHECK-NEXT: [[ADD9]] = add i16 [[ADD]], [[TMP34]]
214+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
215+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
216+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
217+
;
218+
entry:
219+
%cmp19 = icmp sgt i32 %n, 0
220+
br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
221+
222+
for.body.preheader: ; preds = %entry
223+
%wide.trip.count = zext nneg i32 %n to i64
224+
br label %for.body
225+
226+
for.cond.cleanup.loopexit: ; preds = %for.body
227+
%add9.lcssa = phi i16 [ %add9, %for.body ]
228+
%0 = zext i16 %add9.lcssa to i32
229+
br label %for.cond.cleanup
230+
231+
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
232+
%total.0.lcssa = phi i32 [ 0, %entry ], [ %0, %for.cond.cleanup.loopexit ]
233+
ret i32 %total.0.lcssa
234+
235+
for.body: ; preds = %for.body.preheader, %for.body
236+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
237+
%total.020 = phi i16 [ 0, %for.body.preheader ], [ %add9, %for.body ]
238+
%arrayidx = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
239+
%1 = load i16, ptr %arrayidx, align 2
240+
%arrayidx2 = getelementptr inbounds i16, ptr %c, i64 %indvars.iv
241+
store i16 %1, ptr %arrayidx2, align 2
242+
%arrayidx6 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
243+
%2 = load i16, ptr %arrayidx6, align 2
244+
%add = add i16 %1, %total.020
245+
%add9 = add i16 %add, %2
246+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
247+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
248+
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
249+
}

0 commit comments

Comments
 (0)