@@ -117,3 +117,115 @@ entry:
117
117
musttail call void @sret_callee (ptr sret ({ double , double }) align 8 %result )
118
118
ret void
119
119
}
120
+
121
+ ; Clang only uses byval for arguments of 65 bytes or larger, but we test with a
122
+ ; 20 byte struct to keep the tests more readable. This size was chosen to still
123
+ ; make sure that it will be split between registers and the stack, to test all
124
+ ; of the interesting code paths in the backend.
125
+ %twenty_bytes = type { [5 x i32 ] }
126
+ declare void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
127
+
128
+ ; Functions with byval parameters can be tail-called, because the value is
129
+ ; actually passed in registers and the stack in the same way for the caller and
130
+ ; callee. Within @large_caller the first 16 bytes of the argument are spilled
131
+ ; to the local stack frame, but for the tail-call they are passed in r0-r3, so
132
+ ; it's safe to de-allocate that memory before the call. Most of the code
133
+ ; generated for this isn't needed, but that's a missed optimisation, not a
134
+ ; correctness issue.
135
+ define void @large_caller (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
136
+ ; CHECK-LABEL: large_caller:
137
+ ; CHECK: @ %bb.0: @ %entry
138
+ ; CHECK-NEXT: .pad #16
139
+ ; CHECK-NEXT: sub sp, sp, #16
140
+ ; CHECK-NEXT: .save {r4, lr}
141
+ ; CHECK-NEXT: push {r4, lr}
142
+ ; CHECK-NEXT: add r12, sp, #8
143
+ ; CHECK-NEXT: add lr, sp, #24
144
+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
145
+ ; CHECK-NEXT: add r12, sp, #8
146
+ ; CHECK-NEXT: add r12, r12, #16
147
+ ; CHECK-NEXT: ldr r4, [r12], #4
148
+ ; CHECK-NEXT: str r4, [lr], #4
149
+ ; CHECK-NEXT: pop {r4, lr}
150
+ ; CHECK-NEXT: add sp, sp, #16
151
+ ; CHECK-NEXT: b large_callee
152
+ entry:
153
+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
154
+ ret void
155
+ }
156
+
157
+ ; As above, but with some inline asm to test that the arguments in r0-r3 are
158
+ ; re-loaded before the call.
159
+ define void @large_caller_check_regs (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
160
+ ; CHECK-LABEL: large_caller_check_regs:
161
+ ; CHECK: @ %bb.0: @ %entry
162
+ ; CHECK-NEXT: .pad #16
163
+ ; CHECK-NEXT: sub sp, sp, #16
164
+ ; CHECK-NEXT: .save {r4, lr}
165
+ ; CHECK-NEXT: push {r4, lr}
166
+ ; CHECK-NEXT: add r12, sp, #8
167
+ ; CHECK-NEXT: add lr, sp, #24
168
+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
169
+ ; CHECK-NEXT: @APP
170
+ ; CHECK-NEXT: @NO_APP
171
+ ; CHECK-NEXT: add r3, sp, #8
172
+ ; CHECK-NEXT: add r0, sp, #8
173
+ ; CHECK-NEXT: add r12, r0, #16
174
+ ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
175
+ ; CHECK-NEXT: ldr r4, [r12], #4
176
+ ; CHECK-NEXT: str r4, [lr], #4
177
+ ; CHECK-NEXT: pop {r4, lr}
178
+ ; CHECK-NEXT: add sp, sp, #16
179
+ ; CHECK-NEXT: b large_callee
180
+ entry:
181
+ tail call void asm sideeffect "" , "~{r0},~{r1},~{r2},~{r3}" ()
182
+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
183
+ ret void
184
+ }
185
+
186
+ ; The IR for this one looks dodgy, because it has an alloca passed to a
187
+ ; musttail function, but it is passed as a byval argument, so will be copied
188
+ ; into the stack space allocated by @large_caller_new_value's caller, so is
189
+ ; valid.
190
+ define void @large_caller_new_value (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
191
+ ; CHECK-LABEL: large_caller_new_value:
192
+ ; CHECK: @ %bb.0: @ %entry
193
+ ; CHECK-NEXT: .pad #36
194
+ ; CHECK-NEXT: sub sp, sp, #36
195
+ ; CHECK-NEXT: add r12, sp, #20
196
+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
197
+ ; CHECK-NEXT: mov r0, #4
198
+ ; CHECK-NEXT: add r1, sp, #36
199
+ ; CHECK-NEXT: str r0, [sp, #16]
200
+ ; CHECK-NEXT: mov r0, #3
201
+ ; CHECK-NEXT: str r0, [sp, #12]
202
+ ; CHECK-NEXT: mov r0, #2
203
+ ; CHECK-NEXT: str r0, [sp, #8]
204
+ ; CHECK-NEXT: mov r0, #1
205
+ ; CHECK-NEXT: str r0, [sp, #4]
206
+ ; CHECK-NEXT: mov r0, #0
207
+ ; CHECK-NEXT: str r0, [sp]
208
+ ; CHECK-NEXT: mov r0, sp
209
+ ; CHECK-NEXT: add r0, r0, #16
210
+ ; CHECK-NEXT: mov r3, #3
211
+ ; CHECK-NEXT: ldr r2, [r0], #4
212
+ ; CHECK-NEXT: str r2, [r1], #4
213
+ ; CHECK-NEXT: mov r0, #0
214
+ ; CHECK-NEXT: mov r1, #1
215
+ ; CHECK-NEXT: mov r2, #2
216
+ ; CHECK-NEXT: add sp, sp, #36
217
+ ; CHECK-NEXT: b large_callee
218
+ entry:
219
+ %y = alloca %twenty_bytes , align 4
220
+ store i32 0 , ptr %y , align 4
221
+ %0 = getelementptr inbounds i8 , ptr %y , i32 4
222
+ store i32 1 , ptr %0 , align 4
223
+ %1 = getelementptr inbounds i8 , ptr %y , i32 8
224
+ store i32 2 , ptr %1 , align 4
225
+ %2 = getelementptr inbounds i8 , ptr %y , i32 12
226
+ store i32 3 , ptr %2 , align 4
227
+ %3 = getelementptr inbounds i8 , ptr %y , i32 16
228
+ store i32 4 , ptr %3 , align 4
229
+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %y )
230
+ ret void
231
+ }
0 commit comments