Skip to content

Commit b56e716

Browse files
committed
Add unit test to demonstrate behavior before the optimization.
In this example we do not leverage the multiple neon ports in the processor to compute the output using multiple accumulation registers.
1 parent cf69b4c commit b56e716

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown %s -o - | FileCheck %s
2+
3+
# A chain of UABAL instructions that can be reassociated for better ILP.
4+
# Before the optimization, we accumulate in a single long chain.
5+
# CHECK-LABEL: uabal_accumulation
6+
# CHECK: [[START:%.*]]:fpr128 = UABDLv4i16_v4i32
7+
# CHECK: [[A:%.*]]:fpr128 = UABALv4i16_v4i32 [[START]]
8+
# CHECK: [[B:%.*]]:fpr128 = UABALv4i16_v4i32 [[A]]
9+
# CHECK: [[C:%.*]]:fpr128 = UABALv4i16_v4i32 [[B]]
10+
# CHECK: [[D:%.*]]:fpr128 = UABALv4i16_v4i32 [[C]]
11+
# CHECK: [[E:%.*]]:fpr128 = UABALv4i16_v4i32 [[D]]
12+
# CHECK: [[F:%.*]]:fpr128 = UABALv4i16_v4i32 [[E]]
13+
# CHECK: [[G:%.*]]:fpr128 = UABALv4i16_v4i32 [[F]]
14+
# CHECK: [[H:%.*]]:fpr128 = UABALv4i16_v4i32 [[G]]
15+
# CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[H]]
16+
17+
---
18+
name: uabal_accumulation
19+
body: |
20+
bb.0.entry:
21+
liveins: $x0, $x1, $x2, $x3
22+
23+
%3:gpr64 = COPY $x3
24+
%2:gpr64common = COPY $x2
25+
%1:gpr64 = COPY $x1
26+
%0:gpr64common = COPY $x0
27+
%4:fpr64 = LDRDui %0, 0 :: (load (s64))
28+
%5:fpr64 = LDRDui %2, 0 :: (load (s64))
29+
%6:gpr64common = ADDXrr %0, %1
30+
%7:gpr64common = ADDXrr %2, %3
31+
%8:fpr64 = LDRDui %6, 0 :: (load (s64))
32+
%9:fpr64 = LDRDui %7, 0 :: (load (s64))
33+
%10:fpr128 = UABDLv4i16_v4i32 killed %8, killed %9
34+
%11:fpr128 = UABALv4i16_v4i32 %10, killed %4, killed %5
35+
%12:gpr64common = ADDXrr %6, %1
36+
%13:gpr64common = ADDXrr %7, %3
37+
%14:fpr64 = LDRDui %12, 0 :: (load (s64))
38+
%15:fpr64 = LDRDui %13, 0 :: (load (s64))
39+
%16:fpr128 = UABALv4i16_v4i32 %11, killed %14, killed %15
40+
%17:gpr64common = ADDXrr %12, %1
41+
%18:gpr64common = ADDXrr %13, %3
42+
%19:fpr64 = LDRDui %17, 0 :: (load (s64))
43+
%20:fpr64 = LDRDui %18, 0 :: (load (s64))
44+
%21:fpr128 = UABALv4i16_v4i32 %16, killed %19, killed %20
45+
%22:gpr64common = ADDXrr %17, %1
46+
%23:gpr64common = ADDXrr %18, %3
47+
%24:fpr64 = LDRDui %22, 0 :: (load (s64))
48+
%25:fpr64 = LDRDui %23, 0 :: (load (s64))
49+
%26:fpr128 = UABALv4i16_v4i32 %21, killed %24, killed %25
50+
%27:gpr64common = ADDXrr %22, %1
51+
%28:gpr64common = ADDXrr %23, %3
52+
%29:fpr64 = LDRDui %27, 0 :: (load (s64))
53+
%30:fpr64 = LDRDui %28, 0 :: (load (s64))
54+
%31:fpr128 = UABALv4i16_v4i32 %26, killed %29, killed %30
55+
%32:gpr64common = ADDXrr %27, %1
56+
%33:gpr64common = ADDXrr %28, %3
57+
%34:fpr64 = LDRDui %32, 0 :: (load (s64))
58+
%35:fpr64 = LDRDui %33, 0 :: (load (s64))
59+
%36:fpr128 = UABALv4i16_v4i32 %31, killed %34, killed %35
60+
%37:gpr64common = ADDXrr %32, %1
61+
%38:gpr64common = ADDXrr %33, %3
62+
%39:fpr64 = LDRDui %37, 0 :: (load (s64))
63+
%40:fpr64 = LDRDui %38, 0 :: (load (s64))
64+
%41:fpr128 = UABALv4i16_v4i32 %36, killed %39, killed %40
65+
%42:gpr64common = ADDXrr %37, %1
66+
%43:gpr64common = ADDXrr %38, %3
67+
%44:fpr64 = LDRDui %42, 0 :: (load (s64))
68+
%45:fpr64 = LDRDui %43, 0 :: (load (s64))
69+
%46:fpr128 = UABALv4i16_v4i32 %41, killed %44, killed %45
70+
%47:fpr32 = ADDVv4i32v killed %46
71+
%48:fpr128 = IMPLICIT_DEF
72+
%49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub
73+
%50:gpr32all = COPY %49.ssub
74+
$w0 = COPY %50
75+
RET_ReallyLR implicit $w0
76+
77+
...

0 commit comments

Comments
 (0)