|
| 1 | +# RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown %s -o - | FileCheck %s |
| 2 | + |
| 3 | +# A chain of UABAL instructions that can be reassociated for better ILP. |
| 4 | +# Before the optimization, we accumulate in a single long chain. |
| 5 | +# CHECK-LABEL: uabal_accumulation |
| 6 | +# CHECK: [[START:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| 7 | +# CHECK: [[A:%.*]]:fpr128 = UABALv4i16_v4i32 [[START]] |
| 8 | +# CHECK: [[B:%.*]]:fpr128 = UABALv4i16_v4i32 [[A]] |
| 9 | +# CHECK: [[C:%.*]]:fpr128 = UABALv4i16_v4i32 [[B]] |
| 10 | +# CHECK: [[D:%.*]]:fpr128 = UABALv4i16_v4i32 [[C]] |
| 11 | +# CHECK: [[E:%.*]]:fpr128 = UABALv4i16_v4i32 [[D]] |
| 12 | +# CHECK: [[F:%.*]]:fpr128 = UABALv4i16_v4i32 [[E]] |
| 13 | +# CHECK: [[G:%.*]]:fpr128 = UABALv4i16_v4i32 [[F]] |
| 14 | +# CHECK: [[H:%.*]]:fpr128 = UABALv4i16_v4i32 [[G]] |
| 15 | +# CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[H]] |
| 16 | + |
| 17 | +--- |
| 18 | +name: uabal_accumulation |
| 19 | +body: | |
| 20 | + bb.0.entry: |
| 21 | + liveins: $x0, $x1, $x2, $x3 |
| 22 | + |
| 23 | + %3:gpr64 = COPY $x3 |
| 24 | + %2:gpr64common = COPY $x2 |
| 25 | + %1:gpr64 = COPY $x1 |
| 26 | + %0:gpr64common = COPY $x0 |
| 27 | + %4:fpr64 = LDRDui %0, 0 :: (load (s64)) |
| 28 | + %5:fpr64 = LDRDui %2, 0 :: (load (s64)) |
| 29 | + %6:gpr64common = ADDXrr %0, %1 |
| 30 | + %7:gpr64common = ADDXrr %2, %3 |
| 31 | + %8:fpr64 = LDRDui %6, 0 :: (load (s64)) |
| 32 | + %9:fpr64 = LDRDui %7, 0 :: (load (s64)) |
| 33 | + %10:fpr128 = UABDLv4i16_v4i32 killed %8, killed %9 |
| 34 | + %11:fpr128 = UABALv4i16_v4i32 %10, killed %4, killed %5 |
| 35 | + %12:gpr64common = ADDXrr %6, %1 |
| 36 | + %13:gpr64common = ADDXrr %7, %3 |
| 37 | + %14:fpr64 = LDRDui %12, 0 :: (load (s64)) |
| 38 | + %15:fpr64 = LDRDui %13, 0 :: (load (s64)) |
| 39 | + %16:fpr128 = UABALv4i16_v4i32 %11, killed %14, killed %15 |
| 40 | + %17:gpr64common = ADDXrr %12, %1 |
| 41 | + %18:gpr64common = ADDXrr %13, %3 |
| 42 | + %19:fpr64 = LDRDui %17, 0 :: (load (s64)) |
| 43 | + %20:fpr64 = LDRDui %18, 0 :: (load (s64)) |
| 44 | + %21:fpr128 = UABALv4i16_v4i32 %16, killed %19, killed %20 |
| 45 | + %22:gpr64common = ADDXrr %17, %1 |
| 46 | + %23:gpr64common = ADDXrr %18, %3 |
| 47 | + %24:fpr64 = LDRDui %22, 0 :: (load (s64)) |
| 48 | + %25:fpr64 = LDRDui %23, 0 :: (load (s64)) |
| 49 | + %26:fpr128 = UABALv4i16_v4i32 %21, killed %24, killed %25 |
| 50 | + %27:gpr64common = ADDXrr %22, %1 |
| 51 | + %28:gpr64common = ADDXrr %23, %3 |
| 52 | + %29:fpr64 = LDRDui %27, 0 :: (load (s64)) |
| 53 | + %30:fpr64 = LDRDui %28, 0 :: (load (s64)) |
| 54 | + %31:fpr128 = UABALv4i16_v4i32 %26, killed %29, killed %30 |
| 55 | + %32:gpr64common = ADDXrr %27, %1 |
| 56 | + %33:gpr64common = ADDXrr %28, %3 |
| 57 | + %34:fpr64 = LDRDui %32, 0 :: (load (s64)) |
| 58 | + %35:fpr64 = LDRDui %33, 0 :: (load (s64)) |
| 59 | + %36:fpr128 = UABALv4i16_v4i32 %31, killed %34, killed %35 |
| 60 | + %37:gpr64common = ADDXrr %32, %1 |
| 61 | + %38:gpr64common = ADDXrr %33, %3 |
| 62 | + %39:fpr64 = LDRDui %37, 0 :: (load (s64)) |
| 63 | + %40:fpr64 = LDRDui %38, 0 :: (load (s64)) |
| 64 | + %41:fpr128 = UABALv4i16_v4i32 %36, killed %39, killed %40 |
| 65 | + %42:gpr64common = ADDXrr %37, %1 |
| 66 | + %43:gpr64common = ADDXrr %38, %3 |
| 67 | + %44:fpr64 = LDRDui %42, 0 :: (load (s64)) |
| 68 | + %45:fpr64 = LDRDui %43, 0 :: (load (s64)) |
| 69 | + %46:fpr128 = UABALv4i16_v4i32 %41, killed %44, killed %45 |
| 70 | + %47:fpr32 = ADDVv4i32v killed %46 |
| 71 | + %48:fpr128 = IMPLICIT_DEF |
| 72 | + %49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub |
| 73 | + %50:gpr32all = COPY %49.ssub |
| 74 | + $w0 = COPY %50 |
| 75 | + RET_ReallyLR implicit $w0 |
| 76 | +
|
| 77 | +... |
0 commit comments