Skip to content

Commit 49d491d

Browse files
committed
Add more tests and address review comments
1 parent 3babd98 commit 49d491d

File tree

3 files changed

+737
-2
lines changed

3 files changed

+737
-2
lines changed

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,10 +507,9 @@ Value *LoopIdiomVectorize::createPredicatedFindMismatch(
507507

508508
auto *VectorLoadType = ScalableVectorType::get(LoadType, ByteCompareVF);
509509
auto *VF = ConstantInt::get(I32Type, ByteCompareVF);
510-
auto *IsScalable = ConstantInt::getBool(Builder.getContext(), true);
511510

512511
Value *VL = Builder.CreateIntrinsic(Intrinsic::experimental_get_vector_length,
513-
{I64Type}, {AVL, VF, IsScalable});
512+
{I64Type}, {AVL, VF, Builder.getTrue()});
514513
Value *GepOffset = VectorIndexPhi;
515514

516515
Value *VectorLhsGep =
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
3+
4+
; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll
5+
6+
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) {
7+
; CHECK-LABEL: compare_bytes_simple:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: sext.w a4, a3
10+
; CHECK-NEXT: addiw a5, a2, 1
11+
; CHECK-NEXT: bltu a4, a5, .LBB0_7
12+
; CHECK-NEXT: # %bb.1: # %mismatch_mem_check
13+
; CHECK-NEXT: slli a2, a5, 32
14+
; CHECK-NEXT: srli a2, a2, 32
15+
; CHECK-NEXT: slli a6, a3, 32
16+
; CHECK-NEXT: srli a6, a6, 32
17+
; CHECK-NEXT: add a7, a0, a2
18+
; CHECK-NEXT: add t0, a0, a6
19+
; CHECK-NEXT: srli a7, a7, 12
20+
; CHECK-NEXT: srli t0, t0, 12
21+
; CHECK-NEXT: bne a7, t0, .LBB0_7
22+
; CHECK-NEXT: # %bb.2: # %mismatch_mem_check
23+
; CHECK-NEXT: add a7, a1, a2
24+
; CHECK-NEXT: add t0, a1, a6
25+
; CHECK-NEXT: srli a7, a7, 12
26+
; CHECK-NEXT: srli t0, t0, 12
27+
; CHECK-NEXT: bne a7, t0, .LBB0_7
28+
; CHECK-NEXT: .LBB0_3: # %mismatch_vec_loop
29+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
30+
; CHECK-NEXT: sub a4, a6, a2
31+
; CHECK-NEXT: vsetvli a4, a4, e8, m2, ta, ma
32+
; CHECK-NEXT: add a5, a0, a2
33+
; CHECK-NEXT: vle8.v v8, (a5)
34+
; CHECK-NEXT: add a5, a1, a2
35+
; CHECK-NEXT: vle8.v v10, (a5)
36+
; CHECK-NEXT: vmsne.vv v12, v8, v10
37+
; CHECK-NEXT: vfirst.m a7, v12
38+
; CHECK-NEXT: mv a5, a4
39+
; CHECK-NEXT: bltz a7, .LBB0_5
40+
; CHECK-NEXT: # %bb.4: # %mismatch_vec_loop
41+
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
42+
; CHECK-NEXT: mv a5, a7
43+
; CHECK-NEXT: .LBB0_5: # %mismatch_vec_loop
44+
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
45+
; CHECK-NEXT: sext.w a7, a5
46+
; CHECK-NEXT: bne a7, a4, .LBB0_11
47+
; CHECK-NEXT: # %bb.6: # %mismatch_vec_loop_inc
48+
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
49+
; CHECK-NEXT: add a2, a2, a4
50+
; CHECK-NEXT: bne a2, a6, .LBB0_3
51+
; CHECK-NEXT: j .LBB0_9
52+
; CHECK-NEXT: .LBB0_7: # %mismatch_loop
53+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
54+
; CHECK-NEXT: slli a2, a5, 32
55+
; CHECK-NEXT: srli a2, a2, 32
56+
; CHECK-NEXT: add a6, a0, a2
57+
; CHECK-NEXT: lbu a6, 0(a6)
58+
; CHECK-NEXT: add a2, a1, a2
59+
; CHECK-NEXT: lbu a2, 0(a2)
60+
; CHECK-NEXT: bne a6, a2, .LBB0_10
61+
; CHECK-NEXT: # %bb.8: # %mismatch_loop_inc
62+
; CHECK-NEXT: # in Loop: Header=BB0_7 Depth=1
63+
; CHECK-NEXT: addiw a5, a5, 1
64+
; CHECK-NEXT: bne a4, a5, .LBB0_7
65+
; CHECK-NEXT: .LBB0_9: # %while.end
66+
; CHECK-NEXT: mv a0, a3
67+
; CHECK-NEXT: ret
68+
; CHECK-NEXT: .LBB0_10:
69+
; CHECK-NEXT: mv a0, a5
70+
; CHECK-NEXT: ret
71+
; CHECK-NEXT: .LBB0_11: # %mismatch_vec_loop_found
72+
; CHECK-NEXT: slli a5, a5, 32
73+
; CHECK-NEXT: srli a3, a5, 32
74+
; CHECK-NEXT: add a0, a2, a3
75+
; CHECK-NEXT: ret
76+
entry:
77+
%0 = add i32 %len, 1
78+
br label %mismatch_min_it_check
79+
80+
mismatch_min_it_check: ; preds = %entry
81+
%1 = zext i32 %0 to i64
82+
%2 = zext i32 %n to i64
83+
%3 = icmp ule i32 %0, %n
84+
br i1 %3, label %mismatch_mem_check, label %mismatch_loop_pre
85+
86+
mismatch_mem_check: ; preds = %mismatch_min_it_check
87+
%4 = getelementptr i8, ptr %a, i64 %1
88+
%5 = getelementptr i8, ptr %b, i64 %1
89+
%6 = ptrtoint ptr %5 to i64
90+
%7 = ptrtoint ptr %4 to i64
91+
%8 = getelementptr i8, ptr %a, i64 %2
92+
%9 = getelementptr i8, ptr %b, i64 %2
93+
%10 = ptrtoint ptr %8 to i64
94+
%11 = ptrtoint ptr %9 to i64
95+
%12 = lshr i64 %7, 12
96+
%13 = lshr i64 %10, 12
97+
%14 = lshr i64 %6, 12
98+
%15 = lshr i64 %11, 12
99+
%16 = icmp ne i64 %12, %13
100+
%17 = icmp ne i64 %14, %15
101+
%18 = or i1 %16, %17
102+
br i1 %18, label %mismatch_loop_pre, label %mismatch_vec_loop_preheader
103+
104+
mismatch_vec_loop_preheader: ; preds = %mismatch_mem_check
105+
br label %mismatch_vec_loop
106+
107+
mismatch_vec_loop: ; preds = %mismatch_vec_loop_inc, %mismatch_vec_loop_preheader
108+
%mismatch_vector_index = phi i64 [ %1, %mismatch_vec_loop_preheader ], [ %25, %mismatch_vec_loop_inc ]
109+
%avl = sub nuw nsw i64 %2, %mismatch_vector_index
110+
%19 = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 16, i1 true)
111+
%20 = getelementptr inbounds i8, ptr %a, i64 %mismatch_vector_index
112+
%lhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %20, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
113+
%21 = getelementptr inbounds i8, ptr %b, i64 %mismatch_vector_index
114+
%rhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %21, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
115+
%mismatch.cmp = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> %lhs.load, <vscale x 16 x i8> %rhs.load, metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
116+
%22 = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %mismatch.cmp, i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
117+
%23 = icmp ne i32 %22, %19
118+
br i1 %23, label %mismatch_vec_loop_found, label %mismatch_vec_loop_inc
119+
120+
mismatch_vec_loop_inc: ; preds = %mismatch_vec_loop
121+
%24 = zext i32 %19 to i64
122+
%25 = add nuw nsw i64 %mismatch_vector_index, %24
123+
%26 = icmp ne i64 %25, %2
124+
br i1 %26, label %mismatch_vec_loop, label %mismatch_end
125+
126+
mismatch_vec_loop_found: ; preds = %mismatch_vec_loop
127+
%ctz = phi i32 [ %22, %mismatch_vec_loop ]
128+
%mismatch_vector_index1 = phi i64 [ %mismatch_vector_index, %mismatch_vec_loop ]
129+
%27 = zext i32 %ctz to i64
130+
%28 = add nuw nsw i64 %mismatch_vector_index1, %27
131+
%29 = trunc i64 %28 to i32
132+
br label %mismatch_end
133+
134+
mismatch_loop_pre: ; preds = %mismatch_mem_check, %mismatch_min_it_check
135+
br label %mismatch_loop
136+
137+
mismatch_loop: ; preds = %mismatch_loop_inc, %mismatch_loop_pre
138+
%mismatch_index = phi i32 [ %0, %mismatch_loop_pre ], [ %36, %mismatch_loop_inc ]
139+
%30 = zext i32 %mismatch_index to i64
140+
%31 = getelementptr inbounds i8, ptr %a, i64 %30
141+
%32 = load i8, ptr %31, align 1
142+
%33 = getelementptr inbounds i8, ptr %b, i64 %30
143+
%34 = load i8, ptr %33, align 1
144+
%35 = icmp eq i8 %32, %34
145+
br i1 %35, label %mismatch_loop_inc, label %mismatch_end
146+
147+
mismatch_loop_inc: ; preds = %mismatch_loop
148+
%36 = add i32 %mismatch_index, 1
149+
%37 = icmp eq i32 %36, %n
150+
br i1 %37, label %mismatch_end, label %mismatch_loop
151+
152+
mismatch_end: ; preds = %mismatch_loop_inc, %mismatch_loop, %mismatch_vec_loop_found, %mismatch_vec_loop_inc
153+
%mismatch_result = phi i32 [ %n, %mismatch_loop_inc ], [ %mismatch_index, %mismatch_loop ], [ %n, %mismatch_vec_loop_inc ], [ %29, %mismatch_vec_loop_found ]
154+
br i1 true, label %byte.compare, label %while.cond
155+
156+
while.cond: ; preds = %mismatch_end, %while.body
157+
%len.addr = phi i32 [ %len, %mismatch_end ], [ %mismatch_result, %while.body ]
158+
%inc = add i32 %len.addr, 1
159+
%cmp.not = icmp eq i32 %mismatch_result, %n
160+
br i1 %cmp.not, label %while.end, label %while.body
161+
162+
while.body: ; preds = %while.cond
163+
%idxprom = zext i32 %mismatch_result to i64
164+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
165+
%38 = load i8, ptr %arrayidx, align 1
166+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
167+
%39 = load i8, ptr %arrayidx2, align 1
168+
%cmp.not2 = icmp eq i8 %38, %39
169+
br i1 %cmp.not2, label %while.cond, label %while.end
170+
171+
byte.compare: ; preds = %mismatch_end
172+
br label %while.end
173+
174+
while.end: ; preds = %byte.compare, %while.body, %while.cond
175+
%inc.lcssa = phi i32 [ %mismatch_result, %while.body ], [ %mismatch_result, %while.cond ], [ %mismatch_result, %byte.compare ]
176+
ret i32 %inc.lcssa
177+
}
178+

0 commit comments

Comments
 (0)