Skip to content

Commit 2e4c067

Browse files
committed
[LV] Add extra X86 cost tests for any_of reduction and multi-exit loops.
Add extra test coverage to ensure decisions do not change when transitioning to a VPlan-based cost model.
1 parent c5aeca7 commit 2e4c067

File tree

1 file changed

+210
-1
lines changed

1 file changed

+210
-1
lines changed

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 210 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
233233
; CHECK-NEXT: [[ADD4]] = fadd fast float [[ADD]], [[T2]]
234234
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 32
235235
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[T0]]
236-
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
236+
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
237237
; CHECK: loopexit:
238238
; CHECK-NEXT: [[ADD4_LCSSA:%.*]] = phi float [ [[ADD4]], [[FOR]] ], [ [[TMP124]], [[MIDDLE_BLOCK]] ]
239239
; CHECK-NEXT: br label [[FOR_END]]
@@ -271,3 +271,212 @@ for.end:
271271
ret float %s.0.lcssa
272272
}
273273

274+
define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
275+
; CHECK-LABEL: @multi_exit(
276+
; CHECK-NEXT: entry:
277+
; CHECK-NEXT: [[UMAX6:%.*]] = call i64 @llvm.umax.i64(i64 [[B:%.*]], i64 1)
278+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX6]], -1
279+
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
280+
; CHECK-NEXT: [[UMIN7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
281+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN7]], 1
282+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 30
283+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
284+
; CHECK: vector.scevcheck:
285+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
286+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1
287+
; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
288+
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]])
289+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
290+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[UMIN]] to i32
291+
; CHECK-NEXT: [[TMP7:%.*]] = add i32 1, [[TMP6]]
292+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 1
293+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
294+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
295+
; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
296+
; CHECK: vector.memcheck:
297+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1
298+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 8
299+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8
300+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
301+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1]], [[SCEVGEP]]
302+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
303+
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
304+
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP]]
305+
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
306+
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
307+
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
308+
; CHECK: vector.ph:
309+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
310+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
311+
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 4, i64 [[N_MOD_VF]]
312+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP12]]
313+
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
314+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
315+
; CHECK: vector.body:
316+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
317+
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META4:![0-9]+]]
318+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP13]], i64 0
319+
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer
320+
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META7:![0-9]+]]
321+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0
322+
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT13]], <2 x i64> poison, <2 x i32> zeroinitializer
323+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
324+
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
325+
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT14]], zeroinitializer
326+
; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT14]], zeroinitializer
327+
; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i1> [[TMP17]], [[TMP15]]
328+
; CHECK-NEXT: [[TMP20:%.*]] = and <2 x i1> [[TMP18]], [[TMP16]]
329+
; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i1> [[TMP19]] to <2 x i8>
330+
; CHECK-NEXT: [[TMP22:%.*]] = zext <2 x i1> [[TMP20]] to <2 x i8>
331+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i8> [[TMP22]], i32 1
332+
; CHECK-NEXT: store i8 [[TMP23]], ptr [[DST]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
333+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
334+
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
335+
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
336+
; CHECK: middle.block:
337+
; CHECK-NEXT: br label [[SCALAR_PH]]
338+
; CHECK: scalar.ph:
339+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
340+
; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
341+
; CHECK-NEXT: br label [[LOOP:%.*]]
342+
; CHECK: loop:
343+
; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
344+
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
345+
; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A]]
346+
; CHECK-NEXT: br i1 [[EC_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
347+
; CHECK: loop.latch:
348+
; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1]], align 8
349+
; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2]], align 8
350+
; CHECK-NEXT: [[CMP55_US:%.*]] = icmp eq i64 [[L_1]], 0
351+
; CHECK-NEXT: [[CMP_I_US:%.*]] = icmp ne i64 [[L_2]], 0
352+
; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_I_US]], [[CMP55_US]]
353+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[AND]] to i8
354+
; CHECK-NEXT: store i8 [[EXT]], ptr [[DST]], align 1
355+
; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
356+
; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
357+
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
358+
; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
359+
; CHECK: exit:
360+
; CHECK-NEXT: ret void
361+
;
362+
entry:
363+
br label %loop
364+
365+
loop:
366+
%iv.1.wide = phi i64 [ 0, %entry ], [ %iv.1.next.wide, %loop.latch ]
367+
%iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop.latch ]
368+
%ec.1 = icmp ult i64 %iv.1.wide, %A
369+
br i1 %ec.1, label %loop.latch, label %exit
370+
371+
loop.latch:
372+
%l.1 = load i64, ptr %src.1, align 8
373+
%l.2 = load i64, ptr %src.2, align 8
374+
%cmp55.us = icmp eq i64 %l.1, 0
375+
%cmp.i.us = icmp ne i64 %l.2, 0
376+
%and = and i1 %cmp.i.us, %cmp55.us
377+
%ext = zext i1 %and to i8
378+
store i8 %ext, ptr %dst, align 1
379+
%iv.1.next = add i32 %iv.1, 1
380+
%iv.1.next.wide = zext i32 %iv.1.next to i64
381+
%ec.2 = icmp ult i64 %iv.1.next.wide, %B
382+
br i1 %ec.2, label %loop, label %exit
383+
384+
exit:
385+
ret void
386+
}
387+
388+
define i1 @any_of_cost(ptr %start, ptr %end) #0 {
389+
; CHECK-LABEL: @any_of_cost(
390+
; CHECK-NEXT: entry:
391+
; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64
392+
; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
393+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
394+
; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], 40
395+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
396+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 4
397+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
398+
; CHECK: vector.ph:
399+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
400+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
401+
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]]
402+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]]
403+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 40
404+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
405+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
406+
; CHECK: vector.body:
407+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
408+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
409+
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
410+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 40
411+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
412+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 40
413+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 80
414+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 120
415+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
416+
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]]
417+
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
418+
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP9]]
419+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8
420+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 8
421+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 8
422+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP6]], i64 8
423+
; CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP10]], align 8
424+
; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP11]], align 8
425+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP14]], i32 0
426+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP15]], i32 1
427+
; CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP12]], align 8
428+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP13]], align 8
429+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
430+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x ptr> [[TMP20]], ptr [[TMP19]], i32 1
431+
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <2 x ptr> [[TMP17]], zeroinitializer
432+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <2 x ptr> [[TMP21]], zeroinitializer
433+
; CHECK-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[TMP22]], <i1 true, i1 true>
434+
; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP23]], <i1 true, i1 true>
435+
; CHECK-NEXT: [[TMP26]] = or <2 x i1> [[VEC_PHI]], [[TMP24]]
436+
; CHECK-NEXT: [[TMP27]] = or <2 x i1> [[VEC_PHI3]], [[TMP25]]
437+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
438+
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
439+
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
440+
; CHECK: middle.block:
441+
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i1> [[TMP27]], [[TMP26]]
442+
; CHECK-NEXT: [[TMP29:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[BIN_RDX]])
443+
; CHECK-NEXT: [[TMP30:%.*]] = freeze i1 [[TMP29]]
444+
; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP30]], i1 false, i1 false
445+
; CHECK-NEXT: br label [[SCALAR_PH]]
446+
; CHECK: scalar.ph:
447+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
448+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
449+
; CHECK-NEXT: br label [[LOOP:%.*]]
450+
; CHECK: loop:
451+
; CHECK-NEXT: [[ANY_OF:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANY_OF_NEXT:%.*]], [[LOOP]] ]
452+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
453+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 8
454+
; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[GEP]], align 8
455+
; CHECK-NEXT: [[CMP13_NOT_NOT:%.*]] = icmp eq ptr [[L]], null
456+
; CHECK-NEXT: [[ANY_OF_NEXT]] = select i1 [[CMP13_NOT_NOT]], i1 [[ANY_OF]], i1 false
457+
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 40
458+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
459+
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
460+
; CHECK: exit:
461+
; CHECK-NEXT: [[ANY_OF_NEXT_LCSSA:%.*]] = phi i1 [ [[ANY_OF_NEXT]], [[LOOP]] ]
462+
; CHECK-NEXT: ret i1 [[ANY_OF_NEXT_LCSSA]]
463+
;
464+
entry:
465+
br label %loop
466+
467+
loop:
468+
%any.of = phi i1 [ false, %entry ], [ %any.of.next, %loop ]
469+
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
470+
%gep = getelementptr i8, ptr %ptr.iv, i64 8
471+
%l = load ptr, ptr %gep, align 8
472+
%cmp13.not.not = icmp eq ptr %l, null
473+
%any.of.next = select i1 %cmp13.not.not, i1 %any.of, i1 false
474+
%ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 40
475+
%cmp.not = icmp eq ptr %ptr.iv, %end
476+
br i1 %cmp.not, label %exit, label %loop
477+
478+
exit:
479+
ret i1 %any.of.next
480+
}
481+
482+
attributes #0 = { "target-cpu"="penryn" }

0 commit comments

Comments
 (0)