Skip to content

Commit b054363

Browse files
committed
[LV] Add tests showing incorrect vector interleaving with early exits.
When interleaving is forced for early-exit loops, we currently create incorrect code. Test coverage for scalable vectors is added as AArch64 specific test.
1 parent 068af5b commit b054363

File tree

2 files changed

+823
-0
lines changed

2 files changed

+823
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 4
2+
; RUN: opt -p loop-vectorize -enable-early-exit-vectorization -force-vector-interleave=4 -S %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
declare void @init_mem(ptr, i64);
7+
8+
define i64 @same_exit_block_pre_inc_use1() #0 {
9+
; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1(
10+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
13+
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
14+
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
15+
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
16+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
17+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 64
18+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 510, [[TMP1]]
19+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
20+
; CHECK: vector.ph:
21+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
22+
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 64
23+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]]
24+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]]
25+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
26+
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 64
27+
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]]
28+
; CHECK-NEXT: br label [[LOOP:%.*]]
29+
; CHECK: vector.body:
30+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
31+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
32+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
33+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
34+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
35+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
36+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
37+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
38+
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
39+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]]
40+
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP11]])
41+
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
42+
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
43+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
44+
; CHECK: middle.split:
45+
; CHECK-NEXT: br i1 [[TMP12]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_INC:%.*]]
46+
; CHECK: middle.block:
47+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 510, [[N_VEC]]
48+
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
49+
; CHECK: vector.early.exit:
50+
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP11]], i1 true)
51+
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP15]]
52+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]]
53+
; CHECK-NEXT: br label [[LOOP_END]]
54+
; CHECK: scalar.ph:
55+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 3, [[ENTRY:%.*]] ]
56+
; CHECK-NEXT: br label [[LOOP1:%.*]]
57+
; CHECK: loop:
58+
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ [[INDEX]], [[SCALAR_PH]] ]
59+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX2]]
60+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
61+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX2]]
62+
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
63+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
64+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC1]], label [[LOOP_END]]
65+
; CHECK: loop.inc:
66+
; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1
67+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 513
68+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
69+
; CHECK: loop.end:
70+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ], [ 67, [[LOOP_INC]] ], [ [[TMP17]], [[VECTOR_EARLY_EXIT]] ]
71+
; CHECK-NEXT: ret i64 [[RETVAL]]
72+
;
73+
entry:
74+
%p1 = alloca [1024 x i8]
75+
%p2 = alloca [1024 x i8]
76+
call void @init_mem(ptr %p1, i64 1024)
77+
call void @init_mem(ptr %p2, i64 1024)
78+
br label %loop
79+
80+
loop:
81+
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
82+
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
83+
%ld1 = load i8, ptr %arrayidx, align 1
84+
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
85+
%ld2 = load i8, ptr %arrayidx1, align 1
86+
%cmp3 = icmp eq i8 %ld1, %ld2
87+
br i1 %cmp3, label %loop.inc, label %loop.end
88+
89+
loop.inc:
90+
%index.next = add i64 %index, 1
91+
%exitcond = icmp ne i64 %index.next, 513
92+
br i1 %exitcond, label %loop, label %loop.end
93+
94+
loop.end:
95+
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
96+
ret i64 %retval
97+
}
98+
99+
attributes #0 = { "target-features"="+sve" vscale_range(1,16) }

0 commit comments

Comments
 (0)