Skip to content

Commit 7c50136

Browse files
Ard Biesheuvelherbertx
authored andcommitted
crypto: arm64/aes-ghash - yield NEON after every block of input
Avoid excessive scheduling delays under a preemptible kernel by yielding the NEON after every block of input. Signed-off-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 20ab633 commit 7c50136

File tree

2 files changed

+97
-44
lines changed

2 files changed

+97
-44
lines changed

arch/arm64/crypto/ghash-ce-core.S

Lines changed: 80 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -213,22 +213,31 @@
213213
.endm
214214

215215
.macro __pmull_ghash, pn
216-
ld1 {SHASH.2d}, [x3]
217-
ld1 {XL.2d}, [x1]
216+
frame_push 5
217+
218+
mov x19, x0
219+
mov x20, x1
220+
mov x21, x2
221+
mov x22, x3
222+
mov x23, x4
223+
224+
0: ld1 {SHASH.2d}, [x22]
225+
ld1 {XL.2d}, [x20]
218226
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
219227
eor SHASH2.16b, SHASH2.16b, SHASH.16b
220228

221229
__pmull_pre_\pn
222230

223231
/* do the head block first, if supplied */
224-
cbz x4, 0f
225-
ld1 {T1.2d}, [x4]
226-
b 1f
232+
cbz x23, 1f
233+
ld1 {T1.2d}, [x23]
234+
mov x23, xzr
235+
b 2f
227236

228-
0: ld1 {T1.2d}, [x2], #16
229-
sub w0, w0, #1
237+
1: ld1 {T1.2d}, [x21], #16
238+
sub w19, w19, #1
230239

231-
1: /* multiply XL by SHASH in GF(2^128) */
240+
2: /* multiply XL by SHASH in GF(2^128) */
232241
CPU_LE( rev64 T1.16b, T1.16b )
233242

234243
ext T2.16b, XL.16b, XL.16b, #8
@@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b )
250259
eor T2.16b, T2.16b, XH.16b
251260
eor XL.16b, XL.16b, T2.16b
252261

253-
cbnz w0, 0b
262+
cbz w19, 3f
263+
264+
if_will_cond_yield_neon
265+
st1 {XL.2d}, [x20]
266+
do_cond_yield_neon
267+
b 0b
268+
endif_yield_neon
269+
270+
b 1b
254271

255-
st1 {XL.2d}, [x1]
272+
3: st1 {XL.2d}, [x20]
273+
frame_pop
256274
ret
257275
.endm
258276

@@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8)
304322
.endm
305323

306324
.macro pmull_gcm_do_crypt, enc
307-
ld1 {SHASH.2d}, [x4]
308-
ld1 {XL.2d}, [x1]
309-
ldr x8, [x5, #8] // load lower counter
325+
frame_push 10
326+
327+
mov x19, x0
328+
mov x20, x1
329+
mov x21, x2
330+
mov x22, x3
331+
mov x23, x4
332+
mov x24, x5
333+
mov x25, x6
334+
mov x26, x7
335+
.if \enc == 1
336+
ldr x27, [sp, #96] // first stacked arg
337+
.endif
338+
339+
ldr x28, [x24, #8] // load lower counter
340+
CPU_LE( rev x28, x28 )
341+
342+
0: mov x0, x25
343+
load_round_keys w26, x0
344+
ld1 {SHASH.2d}, [x23]
345+
ld1 {XL.2d}, [x20]
310346

311347
movi MASK.16b, #0xe1
312348
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
313-
CPU_LE( rev x8, x8 )
314349
shl MASK.2d, MASK.2d, #57
315350
eor SHASH2.16b, SHASH2.16b, SHASH.16b
316351

317352
.if \enc == 1
318-
ld1 {KS.16b}, [x7]
353+
ld1 {KS.16b}, [x27]
319354
.endif
320355

321-
0: ld1 {CTR.8b}, [x5] // load upper counter
322-
ld1 {INP.16b}, [x3], #16
323-
rev x9, x8
324-
add x8, x8, #1
325-
sub w0, w0, #1
356+
1: ld1 {CTR.8b}, [x24] // load upper counter
357+
ld1 {INP.16b}, [x22], #16
358+
rev x9, x28
359+
add x28, x28, #1
360+
sub w19, w19, #1
326361
ins CTR.d[1], x9 // set lower counter
327362

328363
.if \enc == 1
329364
eor INP.16b, INP.16b, KS.16b // encrypt input
330-
st1 {INP.16b}, [x2], #16
365+
st1 {INP.16b}, [x21], #16
331366
.endif
332367

333368
rev64 T1.16b, INP.16b
334369

335-
cmp w6, #12
336-
b.ge 2f // AES-192/256?
370+
cmp w26, #12
371+
b.ge 4f // AES-192/256?
337372

338-
1: enc_round CTR, v21
373+
2: enc_round CTR, v21
339374

340375
ext T2.16b, XL.16b, XL.16b, #8
341376
ext IN1.16b, T1.16b, T1.16b, #8
@@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 )
390425

391426
.if \enc == 0
392427
eor INP.16b, INP.16b, KS.16b
393-
st1 {INP.16b}, [x2], #16
428+
st1 {INP.16b}, [x21], #16
394429
.endif
395430

396-
cbnz w0, 0b
431+
cbz w19, 3f
397432

398-
CPU_LE( rev x8, x8 )
399-
st1 {XL.2d}, [x1]
400-
str x8, [x5, #8] // store lower counter
433+
if_will_cond_yield_neon
434+
st1 {XL.2d}, [x20]
435+
.if \enc == 1
436+
st1 {KS.16b}, [x27]
437+
.endif
438+
do_cond_yield_neon
439+
b 0b
440+
endif_yield_neon
401441

442+
b 1b
443+
444+
3: st1 {XL.2d}, [x20]
402445
.if \enc == 1
403-
st1 {KS.16b}, [x7]
446+
st1 {KS.16b}, [x27]
404447
.endif
405448

449+
CPU_LE( rev x28, x28 )
450+
str x28, [x24, #8] // store lower counter
451+
452+
frame_pop
406453
ret
407454

408-
2: b.eq 3f // AES-192?
455+
4: b.eq 5f // AES-192?
409456
enc_round CTR, v17
410457
enc_round CTR, v18
411-
3: enc_round CTR, v19
458+
5: enc_round CTR, v19
412459
enc_round CTR, v20
413-
b 1b
460+
b 2b
414461
.endm
415462

416463
/*

arch/arm64/crypto/ghash-ce-glue.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
6363

6464
asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
6565
const u8 src[], struct ghash_key const *k,
66-
u8 ctr[], int rounds, u8 ks[]);
66+
u8 ctr[], u32 const rk[], int rounds,
67+
u8 ks[]);
6768

6869
asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
6970
const u8 src[], struct ghash_key const *k,
70-
u8 ctr[], int rounds);
71+
u8 ctr[], u32 const rk[], int rounds);
7172

7273
asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
7374
u32 const rk[], int rounds);
@@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req)
368369
pmull_gcm_encrypt_block(ks, iv, NULL,
369370
num_rounds(&ctx->aes_key));
370371
put_unaligned_be32(3, iv + GCM_IV_SIZE);
372+
kernel_neon_end();
371373

372-
err = skcipher_walk_aead_encrypt(&walk, req, true);
374+
err = skcipher_walk_aead_encrypt(&walk, req, false);
373375

374376
while (walk.nbytes >= AES_BLOCK_SIZE) {
375377
int blocks = walk.nbytes / AES_BLOCK_SIZE;
376378

379+
kernel_neon_begin();
377380
pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
378381
walk.src.virt.addr, &ctx->ghash_key,
379-
iv, num_rounds(&ctx->aes_key), ks);
382+
iv, ctx->aes_key.key_enc,
383+
num_rounds(&ctx->aes_key), ks);
384+
kernel_neon_end();
380385

381386
err = skcipher_walk_done(&walk,
382387
walk.nbytes % AES_BLOCK_SIZE);
383388
}
384-
kernel_neon_end();
385389
} else {
386390
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
387391
num_rounds(&ctx->aes_key));
388392
put_unaligned_be32(2, iv + GCM_IV_SIZE);
389393

390-
err = skcipher_walk_aead_encrypt(&walk, req, true);
394+
err = skcipher_walk_aead_encrypt(&walk, req, false);
391395

392396
while (walk.nbytes >= AES_BLOCK_SIZE) {
393397
int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -467,30 +471,32 @@ static int gcm_decrypt(struct aead_request *req)
467471
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
468472
num_rounds(&ctx->aes_key));
469473
put_unaligned_be32(2, iv + GCM_IV_SIZE);
474+
kernel_neon_end();
470475

471-
err = skcipher_walk_aead_decrypt(&walk, req, true);
476+
err = skcipher_walk_aead_decrypt(&walk, req, false);
472477

473478
while (walk.nbytes >= AES_BLOCK_SIZE) {
474479
int blocks = walk.nbytes / AES_BLOCK_SIZE;
475480

481+
kernel_neon_begin();
476482
pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
477483
walk.src.virt.addr, &ctx->ghash_key,
478-
iv, num_rounds(&ctx->aes_key));
484+
iv, ctx->aes_key.key_enc,
485+
num_rounds(&ctx->aes_key));
486+
kernel_neon_end();
479487

480488
err = skcipher_walk_done(&walk,
481489
walk.nbytes % AES_BLOCK_SIZE);
482490
}
483491
if (walk.nbytes)
484492
pmull_gcm_encrypt_block(iv, iv, NULL,
485493
num_rounds(&ctx->aes_key));
486-
487-
kernel_neon_end();
488494
} else {
489495
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
490496
num_rounds(&ctx->aes_key));
491497
put_unaligned_be32(2, iv + GCM_IV_SIZE);
492498

493-
err = skcipher_walk_aead_decrypt(&walk, req, true);
499+
err = skcipher_walk_aead_decrypt(&walk, req, false);
494500

495501
while (walk.nbytes >= AES_BLOCK_SIZE) {
496502
int blocks = walk.nbytes / AES_BLOCK_SIZE;

0 commit comments

Comments
 (0)