8
8
define dso_local void @run_test () local_unnamed_addr uwtable {
9
9
; CHECK-LABEL: run_test:
10
10
; CHECK: // %bb.0: // %entry
11
- ; CHECK-NEXT: sub sp, sp, #192
12
- ; CHECK-NEXT: .cfi_def_cfa_offset 192
11
+ ; CHECK-NEXT: sub sp, sp, #208
12
+ ; CHECK-NEXT: .cfi_def_cfa_offset 208
13
13
; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill
14
14
; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill
15
15
; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill
16
16
; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill
17
- ; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
18
- ; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
17
+ ; CHECK-NEXT: str x23, [sp, #160] // 8-byte Folded Spill
18
+ ; CHECK-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
19
+ ; CHECK-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
19
20
; CHECK-NEXT: .cfi_offset w19, -8
20
21
; CHECK-NEXT: .cfi_offset w20, -16
21
22
; CHECK-NEXT: .cfi_offset w21, -24
22
23
; CHECK-NEXT: .cfi_offset w22, -32
23
- ; CHECK-NEXT: .cfi_offset b8, -40
24
- ; CHECK-NEXT: .cfi_offset b9, -48
25
- ; CHECK-NEXT: .cfi_offset b10, -56
26
- ; CHECK-NEXT: .cfi_offset b11, -64
27
- ; CHECK-NEXT: .cfi_offset b12, -72
28
- ; CHECK-NEXT: .cfi_offset b13, -80
29
- ; CHECK-NEXT: .cfi_offset b14, -88
30
- ; CHECK-NEXT: .cfi_offset b15, -96
24
+ ; CHECK-NEXT: .cfi_offset w23, -48
25
+ ; CHECK-NEXT: .cfi_offset b8, -56
26
+ ; CHECK-NEXT: .cfi_offset b9, -64
27
+ ; CHECK-NEXT: .cfi_offset b10, -72
28
+ ; CHECK-NEXT: .cfi_offset b11, -80
29
+ ; CHECK-NEXT: .cfi_offset b12, -88
30
+ ; CHECK-NEXT: .cfi_offset b13, -96
31
+ ; CHECK-NEXT: .cfi_offset b14, -104
32
+ ; CHECK-NEXT: .cfi_offset b15, -112
31
33
; CHECK-NEXT: movi v2.2d, #0000000000000000
32
34
; CHECK-NEXT: // implicit-def: $q1
33
35
; CHECK-NEXT: mov x8, xzr
34
- ; CHECK-NEXT: mov x9, xzr
35
- ; CHECK-NEXT: adrp x10, B+48
36
- ; CHECK-NEXT: add x10, x10, :lo12:B+48
37
- ; CHECK-NEXT: adrp x11, A
38
- ; CHECK-NEXT: add x11, x11, :lo12:A
36
+ ; CHECK-NEXT: adrp x9, B+48
37
+ ; CHECK-NEXT: add x9, x9, :lo12: B+48
38
+ ; CHECK-NEXT: adrp x10, A
39
+ ; CHECK-NEXT: add x10, x10, :lo12: A
40
+ ; CHECK-NEXT: mov x11, xzr
39
41
; CHECK-NEXT: // kill: killed $q1
40
42
; CHECK-NEXT: // implicit-def: $q1
43
+ ; CHECK-NEXT: mov x12, xzr
41
44
; CHECK-NEXT: // implicit-def: $q0
42
45
; CHECK-NEXT: // implicit-def: $q3
43
46
; CHECK-NEXT: // implicit-def: $q4
@@ -69,103 +72,102 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
69
72
; CHECK-NEXT: // kill: killed $q1
70
73
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
71
74
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
72
- ; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
73
- ; CHECK-NEXT: ldr q14, [x8]
74
- ; CHECK-NEXT: mov x12, xzr
75
- ; CHECK-NEXT: ldr x14, [x12]
76
75
; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill
77
- ; CHECK-NEXT: add x19, x11, x8
78
- ; CHECK-NEXT: fmov x15, d14
79
- ; CHECK-NEXT: mov x16, v14.d[1]
80
- ; CHECK-NEXT: ldr q15, [x12]
81
- ; CHECK-NEXT: ldr q14, [x10], #64
76
+ ; CHECK-NEXT: ldr q15, [x8]
77
+ ; CHECK-NEXT: ldr x15, [x8]
78
+ ; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
79
+ ; CHECK-NEXT: add x20, x10, x11
82
80
; CHECK-NEXT: mov v8.16b, v28.16b
83
- ; CHECK-NEXT: fmov x13, d15
84
- ; CHECK-NEXT: mov x18, v15.d[1]
81
+ ; CHECK-NEXT: fmov x2, d15
82
+ ; CHECK-NEXT: mov x17, v15.d[1]
83
+ ; CHECK-NEXT: ldr q14, [x8]
85
84
; CHECK-NEXT: mov v28.16b, v24.16b
86
- ; CHECK-NEXT: mul x17, x15, x14
87
- ; CHECK-NEXT: mov x12, v14.d[1]
88
- ; CHECK-NEXT: fmov x4, d14
89
85
; CHECK-NEXT: mov v24.16b, v20.16b
90
86
; CHECK-NEXT: mov v20.16b, v17.16b
87
+ ; CHECK-NEXT: fmov x13, d14
88
+ ; CHECK-NEXT: mov x16, v14.d[1]
91
89
; CHECK-NEXT: mov v17.16b, v5.16b
92
- ; CHECK-NEXT: mul x1, x16, x14
90
+ ; CHECK-NEXT: mul x3, x2, x15
91
+ ; CHECK-NEXT: ldr q14, [x9], #64
93
92
; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload
94
- ; CHECK-NEXT: ldr x5, [x8]
95
- ; CHECK-NEXT: ldr x19, [x19, #128]
93
+ ; CHECK-NEXT: ldr x6, [x8]
94
+ ; CHECK-NEXT: ldr x20, [x20, #128]
95
+ ; CHECK-NEXT: mul x1, x17, x15
96
+ ; CHECK-NEXT: mov x14, v14.d[1]
97
+ ; CHECK-NEXT: fmov x5, d14
96
98
; CHECK-NEXT: mov v29.16b, v21.16b
97
99
; CHECK-NEXT: mov v21.16b, v0.16b
98
- ; CHECK-NEXT: mul x0, x13, x14
99
100
; CHECK-NEXT: mov v25.16b, v6.16b
101
+ ; CHECK-NEXT: mul x18, x13, x15
100
102
; CHECK-NEXT: mov v6.16b, v2.16b
101
- ; CHECK-NEXT: fmov d15, x17
102
103
; CHECK-NEXT: mov v26.16b, v22.16b
104
+ ; CHECK-NEXT: fmov d15, x3
103
105
; CHECK-NEXT: mov v22.16b, v18.16b
104
- ; CHECK-NEXT: mul x2, x18, x14
105
106
; CHECK-NEXT: mov v18.16b, v7.16b
107
+ ; CHECK-NEXT: mul x0, x16, x15
106
108
; CHECK-NEXT: mov v7.16b, v3.16b
107
109
; CHECK-NEXT: mov v16.16b, v4.16b
108
- ; CHECK-NEXT: add x8, x8 , #8
109
- ; CHECK-NEXT: add x9, x9 , #1
110
+ ; CHECK-NEXT: add x11, x11 , #8
111
+ ; CHECK-NEXT: add x12, x12 , #1
110
112
; CHECK-NEXT: mov v15.d[1], x1
111
- ; CHECK-NEXT: mul x3, x12, x14
112
- ; CHECK-NEXT: cmp x8 , #64
113
- ; CHECK-NEXT: fmov d14, x0
114
- ; CHECK-NEXT: mul x14, x4, x14
113
+ ; CHECK-NEXT: mul x4, x14, x15
114
+ ; CHECK-NEXT: cmp x11 , #64
115
+ ; CHECK-NEXT: fmov d14, x18
116
+ ; CHECK-NEXT: mul x15, x5, x15
115
117
; CHECK-NEXT: add v5.2d, v5.2d, v15.2d
116
- ; CHECK-NEXT: mul x20, x15, x5
117
- ; CHECK-NEXT: mov v14.d[1], x2
118
- ; CHECK-NEXT: mul x15, x15, x19
119
- ; CHECK-NEXT: fmov d0, x14
118
+ ; CHECK-NEXT: mul x21, x2, x6
119
+ ; CHECK-NEXT: mov v14.d[1], x0
120
+ ; CHECK-NEXT: mul x2, x2, x20
121
+ ; CHECK-NEXT: fmov d0, x15
120
122
; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill
121
123
; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload
122
- ; CHECK-NEXT: mul x21 , x13, x19
124
+ ; CHECK-NEXT: mul x22 , x13, x20
123
125
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
124
- ; CHECK-NEXT: fmov d3, x20
125
- ; CHECK-NEXT: mul x7, x16, x5
126
- ; CHECK-NEXT: mov v0.d[1], x3
127
- ; CHECK-NEXT: fmov d1, x15
128
- ; CHECK-NEXT: mul x16, x16, x19
126
+ ; CHECK-NEXT: fmov d3, x21
127
+ ; CHECK-NEXT: mul x19, x17, x6
128
+ ; CHECK-NEXT: mov v0.d[1], x4
129
+ ; CHECK-NEXT: fmov d1, x2
130
+ ; CHECK-NEXT: mul x17, x17, x20
129
131
; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill
130
132
; CHECK-NEXT: add v5.2d, v13.2d, v14.2d
131
- ; CHECK-NEXT: fmov d2, x21
133
+ ; CHECK-NEXT: fmov d2, x22
132
134
; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload
133
- ; CHECK-NEXT: mul x6, x18, x5
135
+ ; CHECK-NEXT: mul x7, x16, x6
134
136
; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload
135
- ; CHECK-NEXT: mov v3.d[1], x7
137
+ ; CHECK-NEXT: mov v3.d[1], x19
136
138
; CHECK-NEXT: add v13.2d, v13.2d, v0.2d
137
- ; CHECK-NEXT: mul x18, x18, x19
138
- ; CHECK-NEXT: mov v1.d[1], x16
139
- ; CHECK-NEXT: mul x22, x4, x19
139
+ ; CHECK-NEXT: mul x16, x16, x20
140
+ ; CHECK-NEXT: mov v1.d[1], x17
141
+ ; CHECK-NEXT: mul x23, x5, x20
140
142
; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill
141
143
; CHECK-NEXT: mov v13.16b, v5.16b
142
144
; CHECK-NEXT: mov v5.16b, v17.16b
143
145
; CHECK-NEXT: mov v17.16b, v20.16b
144
146
; CHECK-NEXT: mov v20.16b, v24.16b
145
- ; CHECK-NEXT: mul x13, x13, x5
147
+ ; CHECK-NEXT: mul x13, x13, x6
146
148
; CHECK-NEXT: mov v24.16b, v28.16b
147
149
; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
148
- ; CHECK-NEXT: mov v2.d[1], x18
150
+ ; CHECK-NEXT: mov v2.d[1], x16
149
151
; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
150
152
; CHECK-NEXT: add v27.2d, v27.2d, v3.2d
151
- ; CHECK-NEXT: mul x17, x12, x19
153
+ ; CHECK-NEXT: mul x18, x14, x20
152
154
; CHECK-NEXT: add v23.2d, v23.2d, v3.2d
153
155
; CHECK-NEXT: add v19.2d, v19.2d, v3.2d
154
- ; CHECK-NEXT: fmov d4, x22
156
+ ; CHECK-NEXT: fmov d4, x23
155
157
; CHECK-NEXT: add v10.2d, v10.2d, v3.2d
156
- ; CHECK-NEXT: mul x14, x4, x5
158
+ ; CHECK-NEXT: mul x15, x5, x6
157
159
; CHECK-NEXT: fmov d0, x13
158
160
; CHECK-NEXT: add v14.2d, v14.2d, v2.2d
159
161
; CHECK-NEXT: add v2.2d, v6.2d, v3.2d
160
- ; CHECK-NEXT: mul x12, x12, x5
162
+ ; CHECK-NEXT: mul x14, x14, x6
161
163
; CHECK-NEXT: mov v3.16b, v7.16b
162
164
; CHECK-NEXT: mov v7.16b, v18.16b
163
- ; CHECK-NEXT: mov v4.d[1], x17
165
+ ; CHECK-NEXT: mov v4.d[1], x18
164
166
; CHECK-NEXT: mov v18.16b, v22.16b
165
- ; CHECK-NEXT: mov v0.d[1], x6
166
- ; CHECK-NEXT: fmov d1, x14
167
+ ; CHECK-NEXT: mov v0.d[1], x7
168
+ ; CHECK-NEXT: fmov d1, x15
167
169
; CHECK-NEXT: add v28.2d, v8.2d, v4.2d
168
- ; CHECK-NEXT: mov v1.d[1], x12
170
+ ; CHECK-NEXT: mov v1.d[1], x14
169
171
; CHECK-NEXT: add v31.2d, v31.2d, v0.2d
170
172
; CHECK-NEXT: add v30.2d, v30.2d, v0.2d
171
173
; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
@@ -192,11 +194,12 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
192
194
; CHECK-NEXT: adrp x8, C
193
195
; CHECK-NEXT: add x8, x8, :lo12:C
194
196
; CHECK-NEXT: stp q11, q30, [x8, #80]
195
- ; CHECK-NEXT: ldp x20, x19, [sp, #176 ] // 16-byte Folded Reload
197
+ ; CHECK-NEXT: ldp x20, x19, [sp, #192 ] // 16-byte Folded Reload
196
198
; CHECK-NEXT: str q1, [x8]
197
199
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
200
+ ; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload
198
201
; CHECK-NEXT: stp q15, q14, [x8, #144]
199
- ; CHECK-NEXT: ldp x22, x21, [sp, #160 ] // 16-byte Folded Reload
202
+ ; CHECK-NEXT: ldp x22, x21, [sp, #176 ] // 16-byte Folded Reload
200
203
; CHECK-NEXT: stp q1, q13, [x8, #16]
201
204
; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
202
205
; CHECK-NEXT: stp q28, q12, [x8, #176]
@@ -216,12 +219,13 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
216
219
; CHECK-NEXT: stp q5, q4, [x8, #432]
217
220
; CHECK-NEXT: stp q2, q3, [x8, #464]
218
221
; CHECK-NEXT: str q0, [x8, #496]
219
- ; CHECK-NEXT: add sp, sp, #192
222
+ ; CHECK-NEXT: add sp, sp, #208
220
223
; CHECK-NEXT: .cfi_def_cfa_offset 0
221
224
; CHECK-NEXT: .cfi_restore w19
222
225
; CHECK-NEXT: .cfi_restore w20
223
226
; CHECK-NEXT: .cfi_restore w21
224
227
; CHECK-NEXT: .cfi_restore w22
228
+ ; CHECK-NEXT: .cfi_restore w23
225
229
; CHECK-NEXT: .cfi_restore b8
226
230
; CHECK-NEXT: .cfi_restore b9
227
231
; CHECK-NEXT: .cfi_restore b10
0 commit comments