|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| 2 | +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\ |
| 3 | +; RUN: -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s |
| 4 | + |
| 5 | +; Test that the pipeliner schedules the store instructions correctly. Since |
| 6 | +; there is a dependence between the store, they cannot be scheduled further than |
| 7 | +; MII cycles/instructions apart. That is, the first store cannot occur multiple |
| 8 | +; times before the second ctore in the schedule. |
| 9 | +define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr { |
| 10 | +; CHECK-LABEL: comp_method: |
| 11 | +; CHECK: # %bb.0: |
| 12 | +; CHECK-NEXT: extsw 7, 8 |
| 13 | +; CHECK-NEXT: extsw 8, 9 |
| 14 | +; CHECK-NEXT: clrldi 9, 6, 32 |
| 15 | +; CHECK-NEXT: addi 6, 3, -1 |
| 16 | +; CHECK-NEXT: mtctr 9 |
| 17 | +; CHECK-NEXT: li 11, 0 |
| 18 | +; CHECK-NEXT: sradi 12, 11, 2 |
| 19 | +; CHECK-NEXT: add 5, 5, 8 |
| 20 | +; CHECK-NEXT: li 8, 2 |
| 21 | +; CHECK-NEXT: li 3, 8 |
| 22 | +; CHECK-NEXT: addi 11, 7, 0 |
| 23 | +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill |
| 24 | +; CHECK-NEXT: lbzu 9, 1(6) |
| 25 | +; CHECK-NEXT: add 12, 12, 10 |
| 26 | +; CHECK-NEXT: extsb 9, 9 |
| 27 | +; CHECK-NEXT: stbx 8, 4, 9 |
| 28 | +; CHECK-NEXT: add 9, 9, 12 |
| 29 | +; CHECK-NEXT: bdz .LBB0_2 |
| 30 | +; CHECK-NEXT: .p2align 4 |
| 31 | +; CHECK-NEXT: .LBB0_1: |
| 32 | +; CHECK-NEXT: lbzu 0, 1(6) |
| 33 | +; CHECK-NEXT: sradi 12, 11, 2 |
| 34 | +; CHECK-NEXT: add 11, 11, 7 |
| 35 | +; CHECK-NEXT: add 12, 12, 10 |
| 36 | +; CHECK-NEXT: sldi 30, 9, 2 |
| 37 | +; CHECK-NEXT: add 9, 9, 30 |
| 38 | +; CHECK-NEXT: extsb 0, 0 |
| 39 | +; CHECK-NEXT: stbx 3, 5, 9 |
| 40 | +; CHECK-NEXT: add 9, 0, 12 |
| 41 | +; CHECK-NEXT: stbx 8, 4, 0 |
| 42 | +; CHECK-NEXT: bdnz .LBB0_1 |
| 43 | +; CHECK-NEXT: .LBB0_2: |
| 44 | +; CHECK-NEXT: sldi 4, 9, 2 |
| 45 | +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload |
| 46 | +; CHECK-NEXT: add 4, 9, 4 |
| 47 | +; CHECK-NEXT: stbx 3, 5, 4 |
| 48 | +; CHECK-NEXT: blr |
| 49 | + %8 = icmp sgt i32 %3, 64 |
| 50 | + tail call void @llvm.assume(i1 %8) |
| 51 | + %9 = and i32 %3, 1 |
| 52 | + %10 = icmp eq i32 %9, 0 |
| 53 | + tail call void @llvm.assume(i1 %10) |
| 54 | + %11 = sext i32 %5 to i64 |
| 55 | + %12 = sext i32 %6 to i64 |
| 56 | + %13 = zext nneg i32 %3 to i64 |
| 57 | + %14 = getelementptr i8, ptr %2, i64 %12 |
| 58 | + br label %16 |
| 59 | + |
| 60 | +15: |
| 61 | + ret void |
| 62 | + |
| 63 | +16: |
| 64 | + %17 = phi i64 [ 0, %7 ], [ %24, %16 ] |
| 65 | + %18 = getelementptr inbounds i8, ptr %0, i64 %17 |
| 66 | + %19 = load i8, ptr %18, align 1 |
| 67 | + %20 = sext i8 %19 to i64 |
| 68 | + %21 = getelementptr inbounds i8, ptr %1, i64 %20 |
| 69 | + store i8 2, ptr %21, align 1 |
| 70 | + %22 = mul nsw i64 %17, %11 |
| 71 | + %a1 = ashr i64 %22, 2 |
| 72 | + %a2 = add i64 %a1, %v1 |
| 73 | + %a3 = add i64 %20, %a2 |
| 74 | + %a4 = mul nsw i64 %a3, 5 |
| 75 | + %23 = getelementptr i8, ptr %14, i64 %a4 |
| 76 | + store i8 8, ptr %23, align 1 |
| 77 | + %24 = add nuw nsw i64 %17, 1 |
| 78 | + %25 = icmp eq i64 %24, %13 |
| 79 | + br i1 %25, label %15, label %16 |
| 80 | +} |
| 81 | + |
| 82 | +declare void @llvm.assume(i1 noundef) #1 |
| 83 | + |
| 84 | +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } |
0 commit comments