|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s |
| 3 | + |
| 4 | +; This previously crashed when spilling a GPR because when we removed a dead |
| 5 | +; ADDI we weren't removing it from the LIS instruction map |
| 6 | + |
| 7 | +define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) { |
| 8 | +; CHECK-LABEL: main: |
| 9 | +; CHECK: # %bb.0: # %entry |
| 10 | +; CHECK-NEXT: addi sp, sp, -112 |
| 11 | +; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| 12 | +; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill |
| 13 | +; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill |
| 14 | +; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill |
| 15 | +; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill |
| 16 | +; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill |
| 17 | +; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill |
| 18 | +; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill |
| 19 | +; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill |
| 20 | +; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill |
| 21 | +; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill |
| 22 | +; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill |
| 23 | +; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill |
| 24 | +; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill |
| 25 | +; CHECK-NEXT: .cfi_offset ra, -8 |
| 26 | +; CHECK-NEXT: .cfi_offset s0, -16 |
| 27 | +; CHECK-NEXT: .cfi_offset s1, -24 |
| 28 | +; CHECK-NEXT: .cfi_offset s2, -32 |
| 29 | +; CHECK-NEXT: .cfi_offset s3, -40 |
| 30 | +; CHECK-NEXT: .cfi_offset s4, -48 |
| 31 | +; CHECK-NEXT: .cfi_offset s5, -56 |
| 32 | +; CHECK-NEXT: .cfi_offset s6, -64 |
| 33 | +; CHECK-NEXT: .cfi_offset s7, -72 |
| 34 | +; CHECK-NEXT: .cfi_offset s8, -80 |
| 35 | +; CHECK-NEXT: .cfi_offset s9, -88 |
| 36 | +; CHECK-NEXT: .cfi_offset s10, -96 |
| 37 | +; CHECK-NEXT: .cfi_offset s11, -104 |
| 38 | +; CHECK-NEXT: li a1, 0 |
| 39 | +; CHECK-NEXT: li a3, 8 |
| 40 | +; CHECK-NEXT: li a5, 12 |
| 41 | +; CHECK-NEXT: li a6, 4 |
| 42 | +; CHECK-NEXT: li a7, 20 |
| 43 | +; CHECK-NEXT: ld s0, 112(sp) |
| 44 | +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma |
| 45 | +; CHECK-NEXT: vmv.v.i v8, 0 |
| 46 | +; CHECK-NEXT: andi a4, a4, 1 |
| 47 | +; CHECK-NEXT: li t0, 4 |
| 48 | +; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i |
| 49 | +; CHECK-NEXT: # =>This Loop Header: Depth=1 |
| 50 | +; CHECK-NEXT: # Child Loop BB0_2 Depth 2 |
| 51 | +; CHECK-NEXT: # Child Loop BB0_3 Depth 3 |
| 52 | +; CHECK-NEXT: # Child Loop BB0_4 Depth 4 |
| 53 | +; CHECK-NEXT: # Child Loop BB0_5 Depth 5 |
| 54 | +; CHECK-NEXT: mv t1, a7 |
| 55 | +; CHECK-NEXT: mv t2, t0 |
| 56 | +; CHECK-NEXT: mv t3, a5 |
| 57 | +; CHECK-NEXT: mv t4, a3 |
| 58 | +; CHECK-NEXT: mv t5, a1 |
| 59 | +; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i |
| 60 | +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 |
| 61 | +; CHECK-NEXT: # => This Loop Header: Depth=2 |
| 62 | +; CHECK-NEXT: # Child Loop BB0_3 Depth 3 |
| 63 | +; CHECK-NEXT: # Child Loop BB0_4 Depth 4 |
| 64 | +; CHECK-NEXT: # Child Loop BB0_5 Depth 5 |
| 65 | +; CHECK-NEXT: mv t6, t1 |
| 66 | +; CHECK-NEXT: mv s1, t2 |
| 67 | +; CHECK-NEXT: mv s2, t3 |
| 68 | +; CHECK-NEXT: mv s3, t4 |
| 69 | +; CHECK-NEXT: mv s4, t5 |
| 70 | +; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i |
| 71 | +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 |
| 72 | +; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 |
| 73 | +; CHECK-NEXT: # => This Loop Header: Depth=3 |
| 74 | +; CHECK-NEXT: # Child Loop BB0_4 Depth 4 |
| 75 | +; CHECK-NEXT: # Child Loop BB0_5 Depth 5 |
| 76 | +; CHECK-NEXT: mv s6, t6 |
| 77 | +; CHECK-NEXT: mv s7, s1 |
| 78 | +; CHECK-NEXT: mv s8, s2 |
| 79 | +; CHECK-NEXT: mv s9, s3 |
| 80 | +; CHECK-NEXT: mv s10, s4 |
| 81 | +; CHECK-NEXT: .LBB0_4: # %vector.ph.i |
| 82 | +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 |
| 83 | +; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 |
| 84 | +; CHECK-NEXT: # Parent Loop BB0_3 Depth=3 |
| 85 | +; CHECK-NEXT: # => This Loop Header: Depth=4 |
| 86 | +; CHECK-NEXT: # Child Loop BB0_5 Depth 5 |
| 87 | +; CHECK-NEXT: li s5, 0 |
| 88 | +; CHECK-NEXT: .LBB0_5: # %vector.body.i |
| 89 | +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 |
| 90 | +; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 |
| 91 | +; CHECK-NEXT: # Parent Loop BB0_3 Depth=3 |
| 92 | +; CHECK-NEXT: # Parent Loop BB0_4 Depth=4 |
| 93 | +; CHECK-NEXT: # => This Inner Loop Header: Depth=5 |
| 94 | +; CHECK-NEXT: addi s11, s5, 4 |
| 95 | +; CHECK-NEXT: add ra, s9, s5 |
| 96 | +; CHECK-NEXT: vse32.v v8, (ra), v0.t |
| 97 | +; CHECK-NEXT: add s5, s7, s5 |
| 98 | +; CHECK-NEXT: vse32.v v8, (s5), v0.t |
| 99 | +; CHECK-NEXT: mv s5, s11 |
| 100 | +; CHECK-NEXT: bne s11, a6, .LBB0_5 |
| 101 | +; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i |
| 102 | +; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4 |
| 103 | +; CHECK-NEXT: addi s10, s10, 4 |
| 104 | +; CHECK-NEXT: addi s9, s9, 4 |
| 105 | +; CHECK-NEXT: addi s8, s8, 4 |
| 106 | +; CHECK-NEXT: addi s7, s7, 4 |
| 107 | +; CHECK-NEXT: andi s5, a0, 1 |
| 108 | +; CHECK-NEXT: addi s6, s6, 4 |
| 109 | +; CHECK-NEXT: beqz s5, .LBB0_4 |
| 110 | +; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i |
| 111 | +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3 |
| 112 | +; CHECK-NEXT: addi s4, s4, 4 |
| 113 | +; CHECK-NEXT: addi s3, s3, 4 |
| 114 | +; CHECK-NEXT: addi s2, s2, 4 |
| 115 | +; CHECK-NEXT: addi s1, s1, 4 |
| 116 | +; CHECK-NEXT: andi s6, a2, 1 |
| 117 | +; CHECK-NEXT: addi t6, t6, 4 |
| 118 | +; CHECK-NEXT: beqz s6, .LBB0_3 |
| 119 | +; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i |
| 120 | +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2 |
| 121 | +; CHECK-NEXT: addi t5, t5, 4 |
| 122 | +; CHECK-NEXT: addi t4, t4, 4 |
| 123 | +; CHECK-NEXT: addi t3, t3, 4 |
| 124 | +; CHECK-NEXT: addi t2, t2, 4 |
| 125 | +; CHECK-NEXT: addi t1, t1, 4 |
| 126 | +; CHECK-NEXT: beqz a4, .LBB0_2 |
| 127 | +; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i |
| 128 | +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 |
| 129 | +; CHECK-NEXT: addi a1, a1, 4 |
| 130 | +; CHECK-NEXT: addi a3, a3, 4 |
| 131 | +; CHECK-NEXT: addi a5, a5, 4 |
| 132 | +; CHECK-NEXT: addi t0, t0, 4 |
| 133 | +; CHECK-NEXT: addi a7, a7, 4 |
| 134 | +; CHECK-NEXT: beqz s6, .LBB0_1 |
| 135 | +; CHECK-NEXT: # %bb.10: # %l.exit |
| 136 | +; CHECK-NEXT: li a0, 0 |
| 137 | +; CHECK-NEXT: jalr a0 |
| 138 | +; CHECK-NEXT: beqz s5, .LBB0_12 |
| 139 | +; CHECK-NEXT: .LBB0_11: # %for.body7.us.14 |
| 140 | +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 141 | +; CHECK-NEXT: j .LBB0_11 |
| 142 | +; CHECK-NEXT: .LBB0_12: # %for.body7.us.19 |
| 143 | +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| 144 | +; CHECK-NEXT: vmv.s.x v8, s0 |
| 145 | +; CHECK-NEXT: vmv.v.i v16, 0 |
| 146 | +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma |
| 147 | +; CHECK-NEXT: vslideup.vi v16, v8, 1 |
| 148 | +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| 149 | +; CHECK-NEXT: vmsne.vi v8, v16, 0 |
| 150 | +; CHECK-NEXT: vmv.x.s a0, v8 |
| 151 | +; CHECK-NEXT: snez a0, a0 |
| 152 | +; CHECK-NEXT: sb a0, 0(zero) |
| 153 | +; CHECK-NEXT: li a0, 0 |
| 154 | +; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload |
| 155 | +; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload |
| 156 | +; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload |
| 157 | +; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload |
| 158 | +; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload |
| 159 | +; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload |
| 160 | +; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload |
| 161 | +; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload |
| 162 | +; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload |
| 163 | +; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload |
| 164 | +; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload |
| 165 | +; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload |
| 166 | +; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload |
| 167 | +; CHECK-NEXT: addi sp, sp, 112 |
| 168 | +; CHECK-NEXT: ret |
| 169 | +entry: |
| 170 | + %0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() |
| 171 | + br label %for.cond1.preheader.i |
| 172 | + |
| 173 | +for.cond1.preheader.i: ; preds = %for.cond.cleanup3.i, %entry |
| 174 | + %arg.21 = phi i64 [ 0, %entry ], [ %indvars.iv.next74.i, %for.cond.cleanup3.i ] |
| 175 | + br label %for.cond5.preheader.i |
| 176 | + |
| 177 | +for.cond5.preheader.i: ; preds = %for.cond.cleanup7.i, %for.cond1.preheader.i |
| 178 | + %arg.42 = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next70.i, %for.cond.cleanup7.i ] |
| 179 | + %1 = add i64 %arg.42, %arg.21 |
| 180 | + br label %for.cond9.preheader.i |
| 181 | + |
| 182 | +for.cond.cleanup3.i: ; preds = %for.cond.cleanup7.i |
| 183 | + %indvars.iv.next74.i = add i64 %arg.21, 1 |
| 184 | + br i1 %arg.3, label %l.exit, label %for.cond1.preheader.i |
| 185 | + |
| 186 | +for.cond9.preheader.i: ; preds = %for.cond.cleanup11.i, %for.cond5.preheader.i |
| 187 | + %arg.74 = phi i64 [ 0, %for.cond5.preheader.i ], [ %indvars.iv.next66.i, %for.cond.cleanup11.i ] |
| 188 | + %2 = add i64 %1, %arg.74 |
| 189 | + br label %vector.ph.i |
| 190 | + |
| 191 | +for.cond.cleanup7.i: ; preds = %for.cond.cleanup11.i |
| 192 | + %indvars.iv.next70.i = add i64 %arg.42, 1 |
| 193 | + br i1 %arg.5, label %for.cond.cleanup3.i, label %for.cond5.preheader.i |
| 194 | + |
| 195 | +vector.ph.i: ; preds = %for.cond.cleanup15.i, %for.cond9.preheader.i |
| 196 | + %arg.96 = phi i64 [ 0, %for.cond9.preheader.i ], [ %indvars.iv.next62.i, %for.cond.cleanup15.i ] |
| 197 | + %3 = add i64 %2, %arg.96 |
| 198 | + %broadcast.splatinsert.i = insertelement <vscale x 4 x i64> zeroinitializer, i64 %3, i64 0 |
| 199 | + %broadcast.splat.i = shufflevector <vscale x 4 x i64> %broadcast.splatinsert.i, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer |
| 200 | + br label %vector.body.i |
| 201 | + |
| 202 | +vector.body.i: ; preds = %vector.body.i, %vector.ph.i |
| 203 | + %index.i = phi i64 [ 0, %vector.ph.i ], [ %index.next.i, %vector.body.i ] |
| 204 | + %vec.ind.i = phi <vscale x 4 x i64> [ %0, %vector.ph.i ], [ %6, %vector.body.i ] |
| 205 | + %4 = add <vscale x 4 x i64> %vec.ind.i, %broadcast.splat.i |
| 206 | + %5 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %4 |
| 207 | + tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %5, i32 4, <vscale x 4 x i1> zeroinitializer) |
| 208 | + %6 = add <vscale x 4 x i64> %vec.ind.i, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) |
| 209 | + %7 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %6 |
| 210 | + tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %7, i32 4, <vscale x 4 x i1> zeroinitializer) |
| 211 | + %arg.100 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) |
| 212 | + %arg.101 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.100 |
| 213 | + tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.101, i32 4, <vscale x 4 x i1> %arg.6) |
| 214 | + %arg.102 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) |
| 215 | + %arg.103 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.102 |
| 216 | + tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.103, i32 4, <vscale x 4 x i1> zeroinitializer) |
| 217 | + %arg.104 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) |
| 218 | + %arg.105 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.104 |
| 219 | + tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.105, i32 4, <vscale x 4 x i1> %arg.6) |
| 220 | + %arg.106 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 5, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) |
| 221 | + %arg.107 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.106 |
| 222 | + tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.107, i32 4, <vscale x 4 x i1> zeroinitializer) |
| 223 | + %index.next.i = add i64 %index.i, 1 |
| 224 | + %arg.108 = icmp eq i64 %index.i, 0 |
| 225 | + br i1 %arg.108, label %for.cond.cleanup15.i, label %vector.body.i |
| 226 | + |
| 227 | +for.cond.cleanup11.i: ; preds = %for.cond.cleanup15.i |
| 228 | + %indvars.iv.next66.i = add i64 %arg.74, 1 |
| 229 | + br i1 %arg.3, label %for.cond.cleanup7.i, label %for.cond9.preheader.i |
| 230 | + |
| 231 | +for.cond.cleanup15.i: ; preds = %vector.body.i |
| 232 | + %indvars.iv.next62.i = add i64 %arg.96, 1 |
| 233 | + br i1 %arg.1, label %for.cond.cleanup11.i, label %vector.ph.i |
| 234 | + |
| 235 | +l.exit: ; preds = %for.cond.cleanup3.i |
| 236 | + tail call void null() |
| 237 | + br i1 %arg.1, label %for.body7.us.14, label %for.body7.us.19 |
| 238 | + |
| 239 | +for.body7.us.14: ; preds = %for.body7.us.14, %l.exit |
| 240 | + br label %for.body7.us.14 |
| 241 | + |
| 242 | +for.body7.us.19: ; preds = %l.exit |
| 243 | + %arg.109 = insertelement <32 x i32> zeroinitializer, i32 %arg.10, i64 1 |
| 244 | + %8 = icmp ne <32 x i32> %arg.109, zeroinitializer |
| 245 | + %9 = bitcast <32 x i1> %8 to i32 |
| 246 | + %op.rdx13 = icmp ne i32 %9, 0 |
| 247 | + %op.rdx = zext i1 %op.rdx13 to i8 |
| 248 | + store i8 %op.rdx, ptr null, align 1 |
| 249 | + ret i32 0 |
| 250 | +} |
| 251 | + |
| 252 | +uselistorder i64 0, { 0, 1, 13, 2, 3, 14, 4, 15, 5, 16, 6, 7, 8, 9, 10, 11, 12 } |
| 253 | +uselistorder i64 1, { 0, 1, 2, 3, 6, 4, 5 } |
| 254 | +uselistorder ptr @llvm.masked.scatter.nxv4i32.nxv4p0, { 5, 4, 3, 2, 1, 0 } |
0 commit comments