|
107 | 107 | */
|
108 | 108 | .text
|
109 | 109 | ENTRY(sha512_ce_transform)
|
| 110 | + frame_push 3 |
| 111 | + |
| 112 | + mov x19, x0 |
| 113 | + mov x20, x1 |
| 114 | + mov x21, x2 |
| 115 | + |
110 | 116 | /* load state */
|
111 |
| - ld1 {v8.2d-v11.2d}, [x0] |
| 117 | +0: ld1 {v8.2d-v11.2d}, [x19] |
112 | 118 |
|
113 | 119 | /* load first 4 round constants */
|
114 | 120 | adr_l x3, .Lsha512_rcon
|
115 | 121 | ld1 {v20.2d-v23.2d}, [x3], #64
|
116 | 122 |
|
117 | 123 | /* load input */
|
118 |
| -0: ld1 {v12.2d-v15.2d}, [x1], #64 |
119 |
| - ld1 {v16.2d-v19.2d}, [x1], #64 |
120 |
| - sub w2, w2, #1 |
| 124 | +1: ld1 {v12.2d-v15.2d}, [x20], #64 |
| 125 | + ld1 {v16.2d-v19.2d}, [x20], #64 |
| 126 | + sub w21, w21, #1 |
121 | 127 |
|
122 | 128 | CPU_LE( rev64 v12.16b, v12.16b )
|
123 | 129 | CPU_LE( rev64 v13.16b, v13.16b )
|
@@ -196,9 +202,18 @@ CPU_LE( rev64 v19.16b, v19.16b )
|
196 | 202 | add v11.2d, v11.2d, v3.2d
|
197 | 203 |
|
198 | 204 | /* handled all input blocks? */
|
199 |
| - cbnz w2, 0b |
| 205 | + cbz w21, 3f |
| 206 | + |
| 207 | + if_will_cond_yield_neon |
| 208 | + st1 {v8.2d-v11.2d}, [x19] |
| 209 | + do_cond_yield_neon |
| 210 | + b 0b |
| 211 | + endif_yield_neon |
| 212 | + |
| 213 | + b 1b |
200 | 214 |
|
201 | 215 | /* store new state */
|
202 |
| -3: st1 {v8.2d-v11.2d}, [x0] |
| 216 | +3: st1 {v8.2d-v11.2d}, [x19] |
| 217 | + frame_pop |
203 | 218 | ret
|
204 | 219 | ENDPROC(sha512_ce_transform)
|
0 commit comments