|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s |
| 3 | + |
| 4 | +; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll |
| 5 | + |
| 6 | +define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) { |
| 7 | +; CHECK-LABEL: compare_bytes_simple: |
| 8 | +; CHECK: # %bb.0: # %entry |
| 9 | +; CHECK-NEXT: sext.w a4, a3 |
| 10 | +; CHECK-NEXT: addiw a5, a2, 1 |
| 11 | +; CHECK-NEXT: bltu a4, a5, .LBB0_7 |
| 12 | +; CHECK-NEXT: # %bb.1: # %mismatch_mem_check |
| 13 | +; CHECK-NEXT: slli a2, a5, 32 |
| 14 | +; CHECK-NEXT: srli a2, a2, 32 |
| 15 | +; CHECK-NEXT: slli a6, a3, 32 |
| 16 | +; CHECK-NEXT: srli a6, a6, 32 |
| 17 | +; CHECK-NEXT: add a7, a0, a2 |
| 18 | +; CHECK-NEXT: add t0, a0, a6 |
| 19 | +; CHECK-NEXT: srli a7, a7, 12 |
| 20 | +; CHECK-NEXT: srli t0, t0, 12 |
| 21 | +; CHECK-NEXT: bne a7, t0, .LBB0_7 |
| 22 | +; CHECK-NEXT: # %bb.2: # %mismatch_mem_check |
| 23 | +; CHECK-NEXT: add a7, a1, a2 |
| 24 | +; CHECK-NEXT: add t0, a1, a6 |
| 25 | +; CHECK-NEXT: srli a7, a7, 12 |
| 26 | +; CHECK-NEXT: srli t0, t0, 12 |
| 27 | +; CHECK-NEXT: bne a7, t0, .LBB0_7 |
| 28 | +; CHECK-NEXT: .LBB0_3: # %mismatch_vec_loop |
| 29 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 30 | +; CHECK-NEXT: sub a4, a6, a2 |
| 31 | +; CHECK-NEXT: vsetvli a4, a4, e8, m2, ta, ma |
| 32 | +; CHECK-NEXT: add a5, a0, a2 |
| 33 | +; CHECK-NEXT: vle8.v v8, (a5) |
| 34 | +; CHECK-NEXT: add a5, a1, a2 |
| 35 | +; CHECK-NEXT: vle8.v v10, (a5) |
| 36 | +; CHECK-NEXT: vmsne.vv v12, v8, v10 |
| 37 | +; CHECK-NEXT: vfirst.m a7, v12 |
| 38 | +; CHECK-NEXT: mv a5, a4 |
| 39 | +; CHECK-NEXT: bltz a7, .LBB0_5 |
| 40 | +; CHECK-NEXT: # %bb.4: # %mismatch_vec_loop |
| 41 | +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| 42 | +; CHECK-NEXT: mv a5, a7 |
| 43 | +; CHECK-NEXT: .LBB0_5: # %mismatch_vec_loop |
| 44 | +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| 45 | +; CHECK-NEXT: sext.w a7, a5 |
| 46 | +; CHECK-NEXT: bne a7, a4, .LBB0_11 |
| 47 | +; CHECK-NEXT: # %bb.6: # %mismatch_vec_loop_inc |
| 48 | +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 |
| 49 | +; CHECK-NEXT: add a2, a2, a4 |
| 50 | +; CHECK-NEXT: bne a2, a6, .LBB0_3 |
| 51 | +; CHECK-NEXT: j .LBB0_9 |
| 52 | +; CHECK-NEXT: .LBB0_7: # %mismatch_loop |
| 53 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 54 | +; CHECK-NEXT: slli a2, a5, 32 |
| 55 | +; CHECK-NEXT: srli a2, a2, 32 |
| 56 | +; CHECK-NEXT: add a6, a0, a2 |
| 57 | +; CHECK-NEXT: lbu a6, 0(a6) |
| 58 | +; CHECK-NEXT: add a2, a1, a2 |
| 59 | +; CHECK-NEXT: lbu a2, 0(a2) |
| 60 | +; CHECK-NEXT: bne a6, a2, .LBB0_10 |
| 61 | +; CHECK-NEXT: # %bb.8: # %mismatch_loop_inc |
| 62 | +; CHECK-NEXT: # in Loop: Header=BB0_7 Depth=1 |
| 63 | +; CHECK-NEXT: addiw a5, a5, 1 |
| 64 | +; CHECK-NEXT: bne a4, a5, .LBB0_7 |
| 65 | +; CHECK-NEXT: .LBB0_9: # %while.end |
| 66 | +; CHECK-NEXT: mv a0, a3 |
| 67 | +; CHECK-NEXT: ret |
| 68 | +; CHECK-NEXT: .LBB0_10: |
| 69 | +; CHECK-NEXT: mv a0, a5 |
| 70 | +; CHECK-NEXT: ret |
| 71 | +; CHECK-NEXT: .LBB0_11: # %mismatch_vec_loop_found |
| 72 | +; CHECK-NEXT: slli a5, a5, 32 |
| 73 | +; CHECK-NEXT: srli a3, a5, 32 |
| 74 | +; CHECK-NEXT: add a0, a2, a3 |
| 75 | +; CHECK-NEXT: ret |
| 76 | +entry: |
| 77 | + %0 = add i32 %len, 1 |
| 78 | + br label %mismatch_min_it_check |
| 79 | + |
| 80 | +mismatch_min_it_check: ; preds = %entry |
| 81 | + %1 = zext i32 %0 to i64 |
| 82 | + %2 = zext i32 %n to i64 |
| 83 | + %3 = icmp ule i32 %0, %n |
| 84 | + br i1 %3, label %mismatch_mem_check, label %mismatch_loop_pre |
| 85 | + |
| 86 | +mismatch_mem_check: ; preds = %mismatch_min_it_check |
| 87 | + %4 = getelementptr i8, ptr %a, i64 %1 |
| 88 | + %5 = getelementptr i8, ptr %b, i64 %1 |
| 89 | + %6 = ptrtoint ptr %5 to i64 |
| 90 | + %7 = ptrtoint ptr %4 to i64 |
| 91 | + %8 = getelementptr i8, ptr %a, i64 %2 |
| 92 | + %9 = getelementptr i8, ptr %b, i64 %2 |
| 93 | + %10 = ptrtoint ptr %8 to i64 |
| 94 | + %11 = ptrtoint ptr %9 to i64 |
| 95 | + %12 = lshr i64 %7, 12 |
| 96 | + %13 = lshr i64 %10, 12 |
| 97 | + %14 = lshr i64 %6, 12 |
| 98 | + %15 = lshr i64 %11, 12 |
| 99 | + %16 = icmp ne i64 %12, %13 |
| 100 | + %17 = icmp ne i64 %14, %15 |
| 101 | + %18 = or i1 %16, %17 |
| 102 | + br i1 %18, label %mismatch_loop_pre, label %mismatch_vec_loop_preheader |
| 103 | + |
| 104 | +mismatch_vec_loop_preheader: ; preds = %mismatch_mem_check |
| 105 | + br label %mismatch_vec_loop |
| 106 | + |
| 107 | +mismatch_vec_loop: ; preds = %mismatch_vec_loop_inc, %mismatch_vec_loop_preheader |
| 108 | + %mismatch_vector_index = phi i64 [ %1, %mismatch_vec_loop_preheader ], [ %25, %mismatch_vec_loop_inc ] |
| 109 | + %avl = sub nuw nsw i64 %2, %mismatch_vector_index |
| 110 | + %19 = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 16, i1 true) |
| 111 | + %20 = getelementptr inbounds i8, ptr %a, i64 %mismatch_vector_index |
| 112 | + %lhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %20, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) |
| 113 | + %21 = getelementptr inbounds i8, ptr %b, i64 %mismatch_vector_index |
| 114 | + %rhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %21, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) |
| 115 | + %mismatch.cmp = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> %lhs.load, <vscale x 16 x i8> %rhs.load, metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) |
| 116 | + %22 = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %mismatch.cmp, i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) |
| 117 | + %23 = icmp ne i32 %22, %19 |
| 118 | + br i1 %23, label %mismatch_vec_loop_found, label %mismatch_vec_loop_inc |
| 119 | + |
| 120 | +mismatch_vec_loop_inc: ; preds = %mismatch_vec_loop |
| 121 | + %24 = zext i32 %19 to i64 |
| 122 | + %25 = add nuw nsw i64 %mismatch_vector_index, %24 |
| 123 | + %26 = icmp ne i64 %25, %2 |
| 124 | + br i1 %26, label %mismatch_vec_loop, label %mismatch_end |
| 125 | + |
| 126 | +mismatch_vec_loop_found: ; preds = %mismatch_vec_loop |
| 127 | + %ctz = phi i32 [ %22, %mismatch_vec_loop ] |
| 128 | + %mismatch_vector_index1 = phi i64 [ %mismatch_vector_index, %mismatch_vec_loop ] |
| 129 | + %27 = zext i32 %ctz to i64 |
| 130 | + %28 = add nuw nsw i64 %mismatch_vector_index1, %27 |
| 131 | + %29 = trunc i64 %28 to i32 |
| 132 | + br label %mismatch_end |
| 133 | + |
| 134 | +mismatch_loop_pre: ; preds = %mismatch_mem_check, %mismatch_min_it_check |
| 135 | + br label %mismatch_loop |
| 136 | + |
| 137 | +mismatch_loop: ; preds = %mismatch_loop_inc, %mismatch_loop_pre |
| 138 | + %mismatch_index = phi i32 [ %0, %mismatch_loop_pre ], [ %36, %mismatch_loop_inc ] |
| 139 | + %30 = zext i32 %mismatch_index to i64 |
| 140 | + %31 = getelementptr inbounds i8, ptr %a, i64 %30 |
| 141 | + %32 = load i8, ptr %31, align 1 |
| 142 | + %33 = getelementptr inbounds i8, ptr %b, i64 %30 |
| 143 | + %34 = load i8, ptr %33, align 1 |
| 144 | + %35 = icmp eq i8 %32, %34 |
| 145 | + br i1 %35, label %mismatch_loop_inc, label %mismatch_end |
| 146 | + |
| 147 | +mismatch_loop_inc: ; preds = %mismatch_loop |
| 148 | + %36 = add i32 %mismatch_index, 1 |
| 149 | + %37 = icmp eq i32 %36, %n |
| 150 | + br i1 %37, label %mismatch_end, label %mismatch_loop |
| 151 | + |
| 152 | +mismatch_end: ; preds = %mismatch_loop_inc, %mismatch_loop, %mismatch_vec_loop_found, %mismatch_vec_loop_inc |
| 153 | + %mismatch_result = phi i32 [ %n, %mismatch_loop_inc ], [ %mismatch_index, %mismatch_loop ], [ %n, %mismatch_vec_loop_inc ], [ %29, %mismatch_vec_loop_found ] |
| 154 | + br i1 true, label %byte.compare, label %while.cond |
| 155 | + |
| 156 | +while.cond: ; preds = %mismatch_end, %while.body |
| 157 | + %len.addr = phi i32 [ %len, %mismatch_end ], [ %mismatch_result, %while.body ] |
| 158 | + %inc = add i32 %len.addr, 1 |
| 159 | + %cmp.not = icmp eq i32 %mismatch_result, %n |
| 160 | + br i1 %cmp.not, label %while.end, label %while.body |
| 161 | + |
| 162 | +while.body: ; preds = %while.cond |
| 163 | + %idxprom = zext i32 %mismatch_result to i64 |
| 164 | + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom |
| 165 | + %38 = load i8, ptr %arrayidx, align 1 |
| 166 | + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom |
| 167 | + %39 = load i8, ptr %arrayidx2, align 1 |
| 168 | + %cmp.not2 = icmp eq i8 %38, %39 |
| 169 | + br i1 %cmp.not2, label %while.cond, label %while.end |
| 170 | + |
| 171 | +byte.compare: ; preds = %mismatch_end |
| 172 | + br label %while.end |
| 173 | + |
| 174 | +while.end: ; preds = %byte.compare, %while.body, %while.cond |
| 175 | + %inc.lcssa = phi i32 [ %mismatch_result, %while.body ], [ %mismatch_result, %while.cond ], [ %mismatch_result, %byte.compare ] |
| 176 | + ret i32 %inc.lcssa |
| 177 | +} |
| 178 | + |
0 commit comments