|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=riscv64 | FileCheck %s |
| 2 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s |
3 | 3 |
|
4 | 4 |
|
5 | 5 | ; Make sure we don't emit a pair of shift for the zext in the preheader. We
|
@@ -127,3 +127,113 @@ for.body: ; preds = %for.body, %for.body
|
127 | 127 | %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
|
128 | 128 | br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
|
129 | 129 | }
|
| 130 | + |
| 131 | +define i1 @widen_anyof_rdx(ptr %p, i64 %n) { |
| 132 | +; CHECK-LABEL: widen_anyof_rdx: |
| 133 | +; CHECK: # %bb.0: # %entry |
| 134 | +; CHECK-NEXT: li a2, 0 |
| 135 | +; CHECK-NEXT: vsetvli a3, zero, e64, m4, ta, ma |
| 136 | +; CHECK-NEXT: vmclr.m v12 |
| 137 | +; CHECK-NEXT: vid.v v8 |
| 138 | +; CHECK-NEXT: .LBB2_1: # %loop |
| 139 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 140 | +; CHECK-NEXT: sub a3, a1, a2 |
| 141 | +; CHECK-NEXT: slli a4, a2, 2 |
| 142 | +; CHECK-NEXT: vsetvli a3, a3, e8, mf2, ta, ma |
| 143 | +; CHECK-NEXT: add a4, a0, a4 |
| 144 | +; CHECK-NEXT: vle32.v v14, (a4) |
| 145 | +; CHECK-NEXT: vsetvli a4, zero, e64, m4, ta, ma |
| 146 | +; CHECK-NEXT: vmv.v.x v16, a3 |
| 147 | +; CHECK-NEXT: vmsleu.vv v13, v16, v8 |
| 148 | +; CHECK-NEXT: vmsltu.vx v16, v8, a3 |
| 149 | +; CHECK-NEXT: vmand.mm v13, v12, v13 |
| 150 | +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| 151 | +; CHECK-NEXT: vmsne.vi v17, v14, 0 |
| 152 | +; CHECK-NEXT: vmor.mm v12, v12, v17 |
| 153 | +; CHECK-NEXT: vmand.mm v12, v12, v16 |
| 154 | +; CHECK-NEXT: add a2, a2, a3 |
| 155 | +; CHECK-NEXT: vmor.mm v12, v12, v13 |
| 156 | +; CHECK-NEXT: blt a2, a1, .LBB2_1 |
| 157 | +; CHECK-NEXT: # %bb.2: # %exit |
| 158 | +; CHECK-NEXT: vcpop.m a0, v12 |
| 159 | +; CHECK-NEXT: snez a0, a0 |
| 160 | +; CHECK-NEXT: ret |
| 161 | +entry: |
| 162 | + br label %loop |
| 163 | +loop: |
| 164 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 165 | + %phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ] |
| 166 | + %avl = sub i64 %n, %iv |
| 167 | + %evl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true) |
| 168 | + |
| 169 | + %gep = getelementptr i32, ptr %p, i64 %iv |
| 170 | + %x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %evl) |
| 171 | + %cmp = icmp ne <vscale x 4 x i32> %x, zeroinitializer |
| 172 | + %or = or <vscale x 4 x i1> %phi, %cmp |
| 173 | + %rec = call <vscale x 4 x i1> @llvm.vp.merge(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> %or, <vscale x 4 x i1> %phi, i32 %evl) |
| 174 | + |
| 175 | + %evl.zext = zext i32 %evl to i64 |
| 176 | + %iv.next = add i64 %iv, %evl.zext |
| 177 | + %done = icmp sge i64 %iv.next, %n |
| 178 | + br i1 %done, label %exit, label %loop |
| 179 | +exit: |
| 180 | + %res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec) |
| 181 | + ret i1 %res |
| 182 | +} |
| 183 | + |
| 184 | + |
| 185 | +define i1 @widen_anyof_rdx_use_in_loop(ptr %p, i64 %n) { |
| 186 | +; CHECK-LABEL: widen_anyof_rdx_use_in_loop: |
| 187 | +; CHECK: # %bb.0: # %entry |
| 188 | +; CHECK-NEXT: li a2, 0 |
| 189 | +; CHECK-NEXT: vsetvli a3, zero, e64, m4, ta, ma |
| 190 | +; CHECK-NEXT: vmclr.m v12 |
| 191 | +; CHECK-NEXT: vid.v v8 |
| 192 | +; CHECK-NEXT: .LBB3_1: # %loop |
| 193 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 194 | +; CHECK-NEXT: sub a3, a1, a2 |
| 195 | +; CHECK-NEXT: slli a4, a2, 2 |
| 196 | +; CHECK-NEXT: vsetvli a3, a3, e8, mf2, ta, ma |
| 197 | +; CHECK-NEXT: add a4, a0, a4 |
| 198 | +; CHECK-NEXT: vle32.v v14, (a4) |
| 199 | +; CHECK-NEXT: vsetvli a5, zero, e64, m4, ta, ma |
| 200 | +; CHECK-NEXT: vmv.v.x v16, a3 |
| 201 | +; CHECK-NEXT: vmsleu.vv v13, v16, v8 |
| 202 | +; CHECK-NEXT: vmsltu.vx v16, v8, a3 |
| 203 | +; CHECK-NEXT: vmand.mm v13, v12, v13 |
| 204 | +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| 205 | +; CHECK-NEXT: vmsne.vi v17, v14, 0 |
| 206 | +; CHECK-NEXT: vmor.mm v12, v12, v17 |
| 207 | +; CHECK-NEXT: vmand.mm v12, v12, v16 |
| 208 | +; CHECK-NEXT: vmor.mm v12, v12, v13 |
| 209 | +; CHECK-NEXT: add a2, a2, a3 |
| 210 | +; CHECK-NEXT: vsm.v v12, (a4) |
| 211 | +; CHECK-NEXT: blt a2, a1, .LBB3_1 |
| 212 | +; CHECK-NEXT: # %bb.2: # %exit |
| 213 | +; CHECK-NEXT: vcpop.m a0, v12 |
| 214 | +; CHECK-NEXT: snez a0, a0 |
| 215 | +; CHECK-NEXT: ret |
| 216 | +entry: |
| 217 | + br label %loop |
| 218 | +loop: |
| 219 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 220 | + %phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ] |
| 221 | + %avl = sub i64 %n, %iv |
| 222 | + %evl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true) |
| 223 | + |
| 224 | + %gep = getelementptr i32, ptr %p, i64 %iv |
| 225 | + %x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %evl) |
| 226 | + %cmp = icmp ne <vscale x 4 x i32> %x, zeroinitializer |
| 227 | + %or = or <vscale x 4 x i1> %phi, %cmp |
| 228 | + %rec = call <vscale x 4 x i1> @llvm.vp.merge(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> %or, <vscale x 4 x i1> %phi, i32 %evl) |
| 229 | + |
| 230 | + store <vscale x 4 x i1> %rec, ptr %gep |
| 231 | + |
| 232 | + %evl.zext = zext i32 %evl to i64 |
| 233 | + %iv.next = add i64 %iv, %evl.zext |
| 234 | + %done = icmp sge i64 %iv.next, %n |
| 235 | + br i1 %done, label %exit, label %loop |
| 236 | +exit: |
| 237 | + %res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec) |
| 238 | + ret i1 %res |
| 239 | +} |
0 commit comments