@@ -337,19 +337,18 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
337
337
; CHECK64-LABEL: csr_d8_allocd_framepointer:
338
338
; CHECK64: // %bb.0: // %entry
339
339
; CHECK64-NEXT: sub sp, sp, #176
340
- ; CHECK64-NEXT: str d8, [sp, #80 ] // 8-byte Folded Spill
340
+ ; CHECK64-NEXT: stp d0, d8, [sp, #72 ] // 8-byte Folded Spill
341
341
; CHECK64-NEXT: stp x29, x30, [sp, #152] // 16-byte Folded Spill
342
- ; CHECK64-NEXT: add x29, sp, #80
343
- ; CHECK64-NEXT: .cfi_def_cfa w29, 96
342
+ ; CHECK64-NEXT: add x29, sp, #152
343
+ ; CHECK64-NEXT: .cfi_def_cfa w29, 24
344
344
; CHECK64-NEXT: .cfi_offset w30, -16
345
345
; CHECK64-NEXT: .cfi_offset w29, -24
346
346
; CHECK64-NEXT: .cfi_offset b8, -96
347
347
; CHECK64-NEXT: //APP
348
348
; CHECK64-NEXT: //NO_APP
349
- ; CHECK64-NEXT: stur d0, [x29, #-8]
350
349
; CHECK64-NEXT: ldr x29, [sp, #152] // 8-byte Folded Reload
351
- ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
352
350
; CHECK64-NEXT: mov w0, wzr
351
+ ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
353
352
; CHECK64-NEXT: add sp, sp, #176
354
353
; CHECK64-NEXT: ret
355
354
;
@@ -358,17 +357,17 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
358
357
; CHECK1024-NEXT: sub sp, sp, #1056
359
358
; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
360
359
; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
361
- ; CHECK1024-NEXT: mov x29, sp
360
+ ; CHECK1024-NEXT: add x29, sp, #1032
362
361
; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill
363
362
; CHECK1024-NEXT: sub sp, sp, #1040
364
- ; CHECK1024-NEXT: .cfi_def_cfa w29, 1056
363
+ ; CHECK1024-NEXT: .cfi_def_cfa w29, 24
365
364
; CHECK1024-NEXT: .cfi_offset w30, -16
366
365
; CHECK1024-NEXT: .cfi_offset w29, -24
367
366
; CHECK1024-NEXT: .cfi_offset b8, -1056
368
367
; CHECK1024-NEXT: mov w0, wzr
369
368
; CHECK1024-NEXT: //APP
370
369
; CHECK1024-NEXT: //NO_APP
371
- ; CHECK1024-NEXT: stur d0, [x29 , #-8 ]
370
+ ; CHECK1024-NEXT: str d0, [sp , #1032 ]
372
371
; CHECK1024-NEXT: add sp, sp, #1040
373
372
; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload
374
373
; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
@@ -2893,8 +2892,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
2893
2892
; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill
2894
2893
; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill
2895
2894
; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill
2896
- ; CHECK64-NEXT: mov x29, sp
2897
- ; CHECK64-NEXT: .cfi_def_cfa w29, 176
2895
+ ; CHECK64-NEXT: add x29, sp, #128
2896
+ ; CHECK64-NEXT: .cfi_def_cfa w29, 48
2898
2897
; CHECK64-NEXT: .cfi_offset w19, -16
2899
2898
; CHECK64-NEXT: .cfi_offset w20, -24
2900
2899
; CHECK64-NEXT: .cfi_offset w30, -40
@@ -2913,11 +2912,11 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
2913
2912
; CHECK64-NEXT: mov w20, w0
2914
2913
; CHECK64-NEXT: msub x9, x8, x8, x9
2915
2914
; CHECK64-NEXT: mov sp, x9
2916
- ; CHECK64-NEXT: stur x9, [x29, #-80 ]
2917
- ; CHECK64-NEXT: sub x9, x29, #80
2918
- ; CHECK64-NEXT: sturh wzr, [x29, #-70 ]
2919
- ; CHECK64-NEXT: stur wzr, [x29, #-68 ]
2920
- ; CHECK64-NEXT: sturh w8, [x29, #-72 ]
2915
+ ; CHECK64-NEXT: stur x9, [x29, #-208 ]
2916
+ ; CHECK64-NEXT: sub x9, x29, #208
2917
+ ; CHECK64-NEXT: sturh wzr, [x29, #-198 ]
2918
+ ; CHECK64-NEXT: stur wzr, [x29, #-196 ]
2919
+ ; CHECK64-NEXT: sturh w8, [x29, #-200 ]
2921
2920
; CHECK64-NEXT: msr TPIDR2_EL0, x9
2922
2921
; CHECK64-NEXT: .cfi_offset vg, -32
2923
2922
; CHECK64-NEXT: smstop sm
@@ -2926,14 +2925,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
2926
2925
; CHECK64-NEXT: .cfi_restore vg
2927
2926
; CHECK64-NEXT: smstart za
2928
2927
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
2929
- ; CHECK64-NEXT: sub x0, x29, #80
2928
+ ; CHECK64-NEXT: sub x0, x29, #208
2930
2929
; CHECK64-NEXT: cbnz x8, .LBB33_2
2931
2930
; CHECK64-NEXT: // %bb.1: // %entry
2932
2931
; CHECK64-NEXT: bl __arm_tpidr2_restore
2933
2932
; CHECK64-NEXT: .LBB33_2: // %entry
2934
2933
; CHECK64-NEXT: mov w0, w20
2935
2934
; CHECK64-NEXT: msr TPIDR2_EL0, xzr
2936
- ; CHECK64-NEXT: mov sp, x29
2935
+ ; CHECK64-NEXT: sub sp, x29, #128
2937
2936
; CHECK64-NEXT: .cfi_def_cfa wsp, 176
2938
2937
; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload
2939
2938
; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload
@@ -2972,8 +2971,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
2972
2971
; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
2973
2972
; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill
2974
2973
; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill
2975
- ; CHECK1024-NEXT: mov x29, sp
2976
- ; CHECK1024-NEXT: .cfi_def_cfa w29, 1136
2974
+ ; CHECK1024-NEXT: add x29, sp, #1088
2975
+ ; CHECK1024-NEXT: .cfi_def_cfa w29, 48
2977
2976
; CHECK1024-NEXT: .cfi_offset w19, -8
2978
2977
; CHECK1024-NEXT: .cfi_offset w20, -16
2979
2978
; CHECK1024-NEXT: .cfi_offset w28, -24
@@ -2993,14 +2992,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
2993
2992
; CHECK1024-NEXT: mov w20, w0
2994
2993
; CHECK1024-NEXT: msub x9, x8, x8, x9
2995
2994
; CHECK1024-NEXT: mov sp, x9
2996
- ; CHECK1024-NEXT: sub x10, x29, #784
2995
+ ; CHECK1024-NEXT: sub x10, x29, #1872
2997
2996
; CHECK1024-NEXT: stur x9, [x10, #-256]
2998
- ; CHECK1024-NEXT: sub x9, x29, #774
2999
- ; CHECK1024-NEXT: sub x10, x29, #772
2997
+ ; CHECK1024-NEXT: sub x9, x29, #1862
2998
+ ; CHECK1024-NEXT: sub x10, x29, #1860
3000
2999
; CHECK1024-NEXT: sturh wzr, [x9, #-256]
3001
- ; CHECK1024-NEXT: sub x9, x29, #1040
3000
+ ; CHECK1024-NEXT: sub x9, x29, #2128
3002
3001
; CHECK1024-NEXT: stur wzr, [x10, #-256]
3003
- ; CHECK1024-NEXT: sub x10, x29, #776
3002
+ ; CHECK1024-NEXT: sub x10, x29, #1864
3004
3003
; CHECK1024-NEXT: sturh w8, [x10, #-256]
3005
3004
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
3006
3005
; CHECK1024-NEXT: .cfi_offset vg, -32
@@ -3010,14 +3009,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
3010
3009
; CHECK1024-NEXT: .cfi_restore vg
3011
3010
; CHECK1024-NEXT: smstart za
3012
3011
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
3013
- ; CHECK1024-NEXT: sub x0, x29, #1040
3012
+ ; CHECK1024-NEXT: sub x0, x29, #2128
3014
3013
; CHECK1024-NEXT: cbnz x8, .LBB33_2
3015
3014
; CHECK1024-NEXT: // %bb.1: // %entry
3016
3015
; CHECK1024-NEXT: bl __arm_tpidr2_restore
3017
3016
; CHECK1024-NEXT: .LBB33_2: // %entry
3018
3017
; CHECK1024-NEXT: mov w0, w20
3019
3018
; CHECK1024-NEXT: msr TPIDR2_EL0, xzr
3020
- ; CHECK1024-NEXT: mov sp, x29
3019
+ ; CHECK1024-NEXT: sub sp, x29, #1088
3021
3020
; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136
3022
3021
; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
3023
3022
; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload
@@ -3049,3 +3048,109 @@ entry:
3049
3048
ret i32 %x
3050
3049
}
3051
3050
declare void @other ()
3051
+
3052
+ declare void @bar (ptr noundef) "aarch64_pstate_sm_compatible"
3053
+
3054
+ define i32 @sve_stack_object_and_vla (double %d , i64 %sz ) "aarch64_pstate_sm_compatible" "frame-pointer" ="all" {
3055
+ ; CHECK0-LABEL: sve_stack_object_and_vla:
3056
+ ; CHECK0: // %bb.0: // %entry
3057
+ ; CHECK0-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
3058
+ ; CHECK0-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill
3059
+ ; CHECK0-NEXT: mov x29, sp
3060
+ ; CHECK0-NEXT: addvl sp, sp, #-1
3061
+ ; CHECK0-NEXT: mov x19, sp
3062
+ ; CHECK0-NEXT: .cfi_def_cfa w29, 32
3063
+ ; CHECK0-NEXT: .cfi_offset w19, -8
3064
+ ; CHECK0-NEXT: .cfi_offset w28, -16
3065
+ ; CHECK0-NEXT: .cfi_offset w30, -24
3066
+ ; CHECK0-NEXT: .cfi_offset w29, -32
3067
+ ; CHECK0-NEXT: lsl x9, x0, #2
3068
+ ; CHECK0-NEXT: mov x8, sp
3069
+ ; CHECK0-NEXT: add x9, x9, #15
3070
+ ; CHECK0-NEXT: and x9, x9, #0xfffffffffffffff0
3071
+ ; CHECK0-NEXT: sub x0, x8, x9
3072
+ ; CHECK0-NEXT: mov sp, x0
3073
+ ; CHECK0-NEXT: mov z0.s, #0 // =0x0
3074
+ ; CHECK0-NEXT: ptrue p0.s
3075
+ ; CHECK0-NEXT: st1w { z0.s }, p0, [x29, #-1, mul vl]
3076
+ ; CHECK0-NEXT: bl bar
3077
+ ; CHECK0-NEXT: mov w0, wzr
3078
+ ; CHECK0-NEXT: mov sp, x29
3079
+ ; CHECK0-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload
3080
+ ; CHECK0-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
3081
+ ; CHECK0-NEXT: ret
3082
+ ;
3083
+ ; CHECK64-LABEL: sve_stack_object_and_vla:
3084
+ ; CHECK64: // %bb.0: // %entry
3085
+ ; CHECK64-NEXT: sub sp, sp, #96
3086
+ ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
3087
+ ; CHECK64-NEXT: add x29, sp, #64
3088
+ ; CHECK64-NEXT: stp x28, x19, [sp, #80] // 16-byte Folded Spill
3089
+ ; CHECK64-NEXT: sub sp, sp, #64
3090
+ ; CHECK64-NEXT: addvl sp, sp, #-1
3091
+ ; CHECK64-NEXT: mov x19, sp
3092
+ ; CHECK64-NEXT: .cfi_def_cfa w29, 32
3093
+ ; CHECK64-NEXT: .cfi_offset w19, -8
3094
+ ; CHECK64-NEXT: .cfi_offset w28, -16
3095
+ ; CHECK64-NEXT: .cfi_offset w30, -24
3096
+ ; CHECK64-NEXT: .cfi_offset w29, -32
3097
+ ; CHECK64-NEXT: lsl x9, x0, #2
3098
+ ; CHECK64-NEXT: mov x8, sp
3099
+ ; CHECK64-NEXT: add x9, x9, #15
3100
+ ; CHECK64-NEXT: and x9, x9, #0xfffffffffffffff0
3101
+ ; CHECK64-NEXT: sub x0, x8, x9
3102
+ ; CHECK64-NEXT: mov sp, x0
3103
+ ; CHECK64-NEXT: mov z0.s, #0 // =0x0
3104
+ ; CHECK64-NEXT: ptrue p0.s
3105
+ ; CHECK64-NEXT: sub x8, x29, #64
3106
+ ; CHECK64-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
3107
+ ; CHECK64-NEXT: bl bar
3108
+ ; CHECK64-NEXT: mov w0, wzr
3109
+ ; CHECK64-NEXT: sub sp, x29, #64
3110
+ ; CHECK64-NEXT: ldp x28, x19, [sp, #80] // 16-byte Folded Reload
3111
+ ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
3112
+ ; CHECK64-NEXT: add sp, sp, #96
3113
+ ; CHECK64-NEXT: ret
3114
+ ;
3115
+ ; CHECK1024-LABEL: sve_stack_object_and_vla:
3116
+ ; CHECK1024: // %bb.0: // %entry
3117
+ ; CHECK1024-NEXT: sub sp, sp, #1056
3118
+ ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
3119
+ ; CHECK1024-NEXT: add x29, sp, #1024
3120
+ ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
3121
+ ; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill
3122
+ ; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill
3123
+ ; CHECK1024-NEXT: sub sp, sp, #1024
3124
+ ; CHECK1024-NEXT: addvl sp, sp, #-1
3125
+ ; CHECK1024-NEXT: mov x19, sp
3126
+ ; CHECK1024-NEXT: .cfi_def_cfa w29, 32
3127
+ ; CHECK1024-NEXT: .cfi_offset w19, -8
3128
+ ; CHECK1024-NEXT: .cfi_offset w28, -16
3129
+ ; CHECK1024-NEXT: .cfi_offset w30, -24
3130
+ ; CHECK1024-NEXT: .cfi_offset w29, -32
3131
+ ; CHECK1024-NEXT: lsl x9, x0, #2
3132
+ ; CHECK1024-NEXT: mov x8, sp
3133
+ ; CHECK1024-NEXT: add x9, x9, #15
3134
+ ; CHECK1024-NEXT: and x9, x9, #0xfffffffffffffff0
3135
+ ; CHECK1024-NEXT: sub x0, x8, x9
3136
+ ; CHECK1024-NEXT: mov sp, x0
3137
+ ; CHECK1024-NEXT: mov z0.s, #0 // =0x0
3138
+ ; CHECK1024-NEXT: ptrue p0.s
3139
+ ; CHECK1024-NEXT: sub x8, x29, #1024
3140
+ ; CHECK1024-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
3141
+ ; CHECK1024-NEXT: bl bar
3142
+ ; CHECK1024-NEXT: mov w0, wzr
3143
+ ; CHECK1024-NEXT: sub sp, x29, #1024
3144
+ ; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload
3145
+ ; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload
3146
+ ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
3147
+ ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
3148
+ ; CHECK1024-NEXT: add sp, sp, #1056
3149
+ ; CHECK1024-NEXT: ret
3150
+ entry:
3151
+ %a = alloca <vscale x 4 x i32 >
3152
+ %b = alloca i32 , i64 %sz , align 4
3153
+ store <vscale x 4 x i32 > zeroinitializer , ptr %a
3154
+ call void @bar (ptr noundef nonnull %b )
3155
+ ret i32 0
3156
+ }
0 commit comments