|
| 1 | +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -run-pass=pipeliner -o - %s -aarch64-enable-pipeliner -pipeliner-enable-copytophi=1 |
| 2 | + |
| 3 | +--- | |
| 4 | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
| 5 | + |
| 6 | + @glb = internal unnamed_addr global { [256 x i32], [256 x i32], [256 x i32] } zeroinitializer |
| 7 | + |
| 8 | + ; Function Attrs: nounwind vscale_range(1,16) |
| 9 | + define internal void @f(i32 %0, i32 %1) #0 { |
| 10 | + entry: |
| 11 | + %reass.sub = sub i32 %1, %0 |
| 12 | + %invariant.op = add i32 %0, 1 |
| 13 | + %invariant.op3 = add i32 %0, 2 |
| 14 | + %omp_loop.cmp5.not = icmp eq i32 %reass.sub, -1 |
| 15 | + br i1 %omp_loop.cmp5.not, label %exit, label %preheader |
| 16 | + |
| 17 | + preheader: ; preds = %entry |
| 18 | + %2 = add i32 %1, 1 |
| 19 | + %3 = icmp slt i32 %2, %invariant.op |
| 20 | + br i1 %3, label %body.preheader, label %vector.ph |
| 21 | + |
| 22 | + body.preheader: ; preds = %preheader |
| 23 | + %4 = add i32 %1, 1 |
| 24 | + %5 = sub i32 %4, %0 |
| 25 | + br label %body |
| 26 | + |
| 27 | + vector.ph: ; preds = %preheader |
| 28 | + %6 = add i32 %1, 1 |
| 29 | + %7 = sub i32 %6, %0 |
| 30 | + %8 = tail call i32 @llvm.vscale.i32() |
| 31 | + %9 = shl nuw nsw i32 %8, 2 |
| 32 | + %10 = tail call i32 @llvm.vscale.i32() |
| 33 | + %11 = shl nuw nsw i32 %10, 2 |
| 34 | + %12 = call i32 @llvm.usub.sat.i32(i32 %7, i32 %11) |
| 35 | + %active.lane.mask.entry = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %7) |
| 36 | + %13 = tail call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() |
| 37 | + %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %9, i64 0 |
| 38 | + %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 39 | + %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %invariant.op, i64 0 |
| 40 | + %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 41 | + %broadcast.splatinsert7 = insertelement <vscale x 4 x i32> poison, i32 %invariant.op3, i64 0 |
| 42 | + %broadcast.splat8 = shufflevector <vscale x 4 x i32> %broadcast.splatinsert7, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| 43 | + br label %vector.body |
| 44 | + |
| 45 | + vector.body: ; preds = %vector.body, %vector.ph |
| 46 | + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| 47 | + %active.lane.mask = phi <vscale x 4 x i1> [ %active.lane.mask.entry, %vector.ph ], [ %active.lane.mask.next, %vector.body ] |
| 48 | + %vec.ind = phi <vscale x 4 x i32> [ %13, %vector.ph ], [ %vec.ind.next, %vector.body ] |
| 49 | + %14 = add <vscale x 4 x i32> %vec.ind, %broadcast.splat |
| 50 | + %15 = extractelement <vscale x 4 x i32> %14, i64 0 |
| 51 | + %16 = sext i32 %15 to i64 |
| 52 | + %17 = add nsw i64 %16, -1 |
| 53 | + %18 = getelementptr i32, ptr @glb, i64 %17 |
| 54 | + call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %14, ptr %18, i32 4, <vscale x 4 x i1> %active.lane.mask) |
| 55 | + %19 = add <vscale x 4 x i32> %vec.ind, %broadcast.splat8 |
| 56 | + %20 = mul <vscale x 4 x i32> %14, %19 |
| 57 | + %21 = sdiv <vscale x 4 x i32> %20, splat (i32 2) |
| 58 | + %22 = getelementptr i32, ptr getelementptr inbounds nuw (i8, ptr @glb, i64 1024), i64 %17 |
| 59 | + call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %21, ptr %22, i32 4, <vscale x 4 x i1> %active.lane.mask) |
| 60 | + %23 = getelementptr i32, ptr getelementptr inbounds nuw (i8, ptr @glb, i64 2048), i64 %17 |
| 61 | + %wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %23, i32 4, <vscale x 4 x i1> %active.lane.mask, <vscale x 4 x i32> poison) |
| 62 | + %24 = add <vscale x 4 x i32> %wide.masked.load, %21 |
| 63 | + call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %24, ptr %23, i32 4, <vscale x 4 x i1> %active.lane.mask) |
| 64 | + %25 = tail call i32 @llvm.vscale.i32() |
| 65 | + %26 = shl nuw nsw i32 %25, 2 |
| 66 | + %index.next = add i32 %index, %26 |
| 67 | + %active.lane.mask.next = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 %index, i32 %12) |
| 68 | + %vec.ind.next = add <vscale x 4 x i32> %vec.ind, %.splat |
| 69 | + %27 = extractelement <vscale x 4 x i1> %active.lane.mask.next, i64 0 |
| 70 | + br i1 %27, label %vector.body, label %exit |
| 71 | + |
| 72 | + exit: ; preds = %vector.body, %body, %entry |
| 73 | + ret void |
| 74 | + |
| 75 | + body: ; preds = %body.preheader, %body |
| 76 | + %lsr.iv2 = phi i32 [ %invariant.op3, %body.preheader ], [ %lsr.iv.next3, %body ] |
| 77 | + %lsr.iv = phi i32 [ %5, %body.preheader ], [ %lsr.iv.next, %body ] |
| 78 | + %28 = add i32 %lsr.iv2, -1 |
| 79 | + %29 = sext i32 %28 to i64 |
| 80 | + %30 = add nsw i64 %29, -1 |
| 81 | + %31 = getelementptr i32, ptr @glb, i64 %30 |
| 82 | + store i32 %28, ptr %31, align 4 |
| 83 | + %32 = mul i32 %28, %lsr.iv2 |
| 84 | + %33 = sdiv i32 %32, 2 |
| 85 | + %34 = getelementptr i32, ptr getelementptr inbounds nuw (i8, ptr @glb, i64 1024), i64 %30 |
| 86 | + store i32 %33, ptr %34, align 4 |
| 87 | + %35 = getelementptr i32, ptr getelementptr inbounds nuw (i8, ptr @glb, i64 2048), i64 %30 |
| 88 | + %36 = load i32, ptr %35, align 4 |
| 89 | + %37 = add i32 %36, %33 |
| 90 | + store i32 %37, ptr %35, align 4 |
| 91 | + %lsr.iv.next = add i32 %lsr.iv, -1 |
| 92 | + %lsr.iv.next3 = add i32 %lsr.iv2, 1 |
| 93 | + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 |
| 94 | + br i1 %exitcond.not, label %exit, label %body |
| 95 | + } |
| 96 | + |
| 97 | + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| 98 | + declare <vscale x 4 x i32> @llvm.stepvector.nxv4i32() #1 |
| 99 | + |
| 100 | + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| 101 | + declare i32 @llvm.vscale.i32() #1 |
| 102 | + |
| 103 | + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) |
| 104 | + declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32) #1 |
| 105 | + |
| 106 | + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write) |
| 107 | + declare void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32>, ptr captures(none), i32 immarg, <vscale x 4 x i1>) #2 |
| 108 | + |
| 109 | + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read) |
| 110 | + declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr captures(none), i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>) #3 |
| 111 | + |
| 112 | + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| 113 | + declare i32 @llvm.usub.sat.i32(i32, i32) #4 |
| 114 | + |
| 115 | + attributes #0 = { nounwind vscale_range(1,16) "frame-pointer"="non-leaf" "target-cpu"="neoverse-v1" "target-features"="+sve" } |
| 116 | + attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } |
| 117 | + attributes #2 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } |
| 118 | + attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } |
| 119 | + attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
| 120 | + |
| 121 | +... |
| 122 | +--- |
| 123 | +name: f |
| 124 | +tracksRegLiveness: true |
| 125 | +body: | |
| 126 | + bb.0.entry: |
| 127 | + successors: %bb.5(0x30000000), %bb.1(0x50000000) |
| 128 | + liveins: $w0, $w1 |
| 129 | + |
| 130 | + %20:gpr32common = COPY $w1 |
| 131 | + %19:gpr32common = COPY $w0 |
| 132 | + %21:gpr32common = SUBWrr %20, %19 |
| 133 | + dead $wzr = ADDSWri %21, 1, 0, implicit-def $nzcv |
| 134 | + Bcc 0, %bb.5, implicit $nzcv |
| 135 | + B %bb.1 |
| 136 | + |
| 137 | + bb.1.preheader: |
| 138 | + successors: %bb.2(0x40000000), %bb.3(0x40000000) |
| 139 | + |
| 140 | + %22:gpr32common = ADDWri %19, 1, 0 |
| 141 | + %23:gpr32sp = ADDWri %19, 2, 0 |
| 142 | + %25:gpr32common = ADDWri %20, 1, 0 |
| 143 | + dead $wzr = SUBSWrr killed %25, %22, implicit-def $nzcv |
| 144 | + Bcc 10, %bb.3, implicit $nzcv |
| 145 | + B %bb.2 |
| 146 | + |
| 147 | + bb.2.body.preheader: |
| 148 | + successors: %bb.6(0x80000000) |
| 149 | + |
| 150 | + %1:gpr32sp = COPY %23 |
| 151 | + %55:gpr32sp = ADDWri %21, 1, 0 |
| 152 | + %2:gpr32all = COPY %55 |
| 153 | + %57:gpr64common = MOVaddr target-flags(aarch64-page) @glb, target-flags(aarch64-pageoff, aarch64-nc) @glb |
| 154 | + B %bb.6 |
| 155 | + |
| 156 | + bb.3.vector.ph: |
| 157 | + successors: %bb.4(0x80000000) |
| 158 | + |
| 159 | + %29:gpr32common = ADDWri %21, 1, 0 |
| 160 | + %30:gpr64 = CNTW_XPiI 31, 1, implicit $vg |
| 161 | + %31:gpr32common = COPY %30.sub_32 |
| 162 | + %32:gpr32 = SUBSWrr %29, %31, implicit-def $nzcv |
| 163 | + %33:gpr32 = COPY $wzr |
| 164 | + %34:gpr32 = CSELWr %33, killed %32, 3, implicit $nzcv |
| 165 | + %4:ppr = WHILELO_PWW_S %33, %29, implicit-def dead $nzcv |
| 166 | + %5:zpr = INDEX_II_S 0, 1, implicit $vg |
| 167 | + %6:zpr = DUP_ZR_S %31 |
| 168 | + %7:zpr = DUP_ZR_S %22 |
| 169 | + %8:zpr = DUP_ZR_S %23 |
| 170 | + %27:gpr32all = COPY %33 |
| 171 | + %37:gpr64common = MOVaddr target-flags(aarch64-page) @glb, target-flags(aarch64-pageoff, aarch64-nc) @glb |
| 172 | + %39:gpr64common = MOVi64imm -1 |
| 173 | + %41:ppr_3b = PTRUE_S 31, implicit $vg |
| 174 | + %44:gpr64common = MOVi64imm 255 |
| 175 | + %45:gpr64common = MOVi64imm 511 |
| 176 | + |
| 177 | + bb.4.vector.body: |
| 178 | + successors: %bb.4(0x7c000000), %bb.5(0x04000000) |
| 179 | + |
| 180 | + %9:gpr32 = PHI %27, %bb.3, %12, %bb.4 |
| 181 | + %10:ppr_3b = PHI %4, %bb.3, %13, %bb.4 |
| 182 | + %11:zpr = PHI %5, %bb.3, %14, %bb.4 |
| 183 | + %35:zpr = ADD_ZZZ_S %11, %7 |
| 184 | + %36:gpr32 = COPY %35.ssub |
| 185 | + %38:gpr64sp = ADDXrx %37, killed %36, 50 |
| 186 | + ST1W %35, %10, %38, %39 :: (store unknown-size into %ir.18, align 4) |
| 187 | + %40:zpr = ADD_ZZZ_S %11, %8 |
| 188 | + %42:zpr = MUL_ZPZZ_S_UNDEF %41, %35, killed %40 |
| 189 | + %43:zpr = ASRD_ZPmI_S %41, %42, 1 |
| 190 | + ST1W %43, %10, %38, %44 :: (store unknown-size into %ir.22, align 4) |
| 191 | + %46:zpr = LD1W %10, %38, %45 :: (load unknown-size from %ir.23, align 4) |
| 192 | + %47:zpr = ADD_ZZZ_S killed %46, %43 |
| 193 | + ST1W killed %47, %10, %38, %45 :: (store unknown-size into %ir.23, align 4) |
| 194 | + %50:gpr32 = ADDWrr %9, %31 |
| 195 | + %12:gpr32all = COPY %50 |
| 196 | + %13:ppr = WHILELO_PWW_S %9, %34, implicit-def $nzcv |
| 197 | + %14:zpr = ADD_ZZZ_S %11, %6 |
| 198 | + Bcc 4, %bb.4, implicit $nzcv |
| 199 | + B %bb.5 |
| 200 | + |
| 201 | + bb.5.exit: |
| 202 | + RET_ReallyLR |
| 203 | + |
| 204 | + bb.6.body: |
| 205 | + successors: %bb.5(0x04000000), %bb.6(0x7c000000) |
| 206 | + |
| 207 | + %15:gpr32common = PHI %1, %bb.2, %18, %bb.6 |
| 208 | + %16:gpr32sp = PHI %2, %bb.2, %17, %bb.6 |
| 209 | + %56:gpr32common = SUBWri %15, 1, 0 |
| 210 | + %58:gpr64sp = ADDXrx %57, %56, 50 |
| 211 | + STURWi %56, %58, -4 :: (store (s32) into %ir.31) |
| 212 | + %59:gpr32 = MADDWrrr %56, %15, $wzr |
| 213 | + %60:gpr32 = ADDWrs %59, %59, 95 |
| 214 | + %61:gpr32 = SBFMWri killed %60, 1, 31 |
| 215 | + STRWui %61, %58, 255 :: (store (s32) into %ir.34) |
| 216 | + %62:gpr32 = LDRWui %58, 511 :: (load (s32) from %ir.35) |
| 217 | + %63:gpr32 = ADDWrr killed %62, %61 |
| 218 | + STRWui killed %63, %58, 511 :: (store (s32) into %ir.35) |
| 219 | + %64:gpr32 = SUBSWri %16, 1, 0, implicit-def $nzcv |
| 220 | + %17:gpr32all = COPY %64 |
| 221 | + %65:gpr32sp = ADDWri %15, 1, 0 |
| 222 | + %18:gpr32all = COPY %65 |
| 223 | + Bcc 0, %bb.5, implicit $nzcv |
| 224 | + B %bb.6 |
| 225 | +
|
| 226 | +... |
0 commit comments