Skip to content

Commit ae952c5

Browse files
Dave Watsonherbertx
authored andcommitted
crypto: aesni - Introduce partial block macro
Before this diff, multiple calls to GCM_ENC_DEC will succeed, but only if all calls are a multiple of 16 bytes. Handle partial blocks at the start of GCM_ENC_DEC, and update aadhash as appropriate. The data offset %r11 is also updated after the partial block. Signed-off-by: Dave Watson <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 1476db2 commit ae952c5

File tree

1 file changed

+150
-1
lines changed

1 file changed

+150
-1
lines changed

arch/x86/crypto/aesni-intel_asm.S

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,13 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
285285
movdqu AadHash(%arg2), %xmm8
286286
movdqu HashKey(%arg2), %xmm13
287287
add %arg5, InLen(%arg2)
288+
289+
xor %r11, %r11 # initialise the data pointer offset as zero
290+
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
291+
292+
sub %r11, %arg5 # sub partial block data used
288293
mov %arg5, %r13 # save the number of bytes
294+
289295
and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
290296
mov %r13, %r12
291297
# Encrypt/Decrypt first few blocks
@@ -606,6 +612,150 @@ _get_AAD_done\@:
606612
movdqu \TMP6, AadHash(%arg2)
607613
.endm
608614

615+
# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
616+
# between update calls.
617+
# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
618+
# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
619+
# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
620+
.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
621+
AAD_HASH operation
622+
mov PBlockLen(%arg2), %r13
623+
cmp $0, %r13
624+
je _partial_block_done_\@ # Leave Macro if no partial blocks
625+
# Read in input data without over reading
626+
cmp $16, \PLAIN_CYPH_LEN
627+
jl _fewer_than_16_bytes_\@
628+
movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
629+
jmp _data_read_\@
630+
631+
_fewer_than_16_bytes_\@:
632+
lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
633+
mov \PLAIN_CYPH_LEN, %r12
634+
READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
635+
636+
mov PBlockLen(%arg2), %r13
637+
638+
_data_read_\@: # Finished reading in data
639+
640+
movdqu PBlockEncKey(%arg2), %xmm9
641+
movdqu HashKey(%arg2), %xmm13
642+
643+
lea SHIFT_MASK(%rip), %r12
644+
645+
# adjust the shuffle mask pointer to be able to shift r13 bytes
646+
# r16-r13 is the number of bytes in plaintext mod 16)
647+
add %r13, %r12
648+
movdqu (%r12), %xmm2 # get the appropriate shuffle mask
649+
PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes
650+
651+
.ifc \operation, dec
652+
movdqa %xmm1, %xmm3
653+
pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
654+
655+
mov \PLAIN_CYPH_LEN, %r10
656+
add %r13, %r10
657+
# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
658+
sub $16, %r10
659+
# Determine if if partial block is not being filled and
660+
# shift mask accordingly
661+
jge _no_extra_mask_1_\@
662+
sub %r10, %r12
663+
_no_extra_mask_1_\@:
664+
665+
movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
666+
# get the appropriate mask to mask out bottom r13 bytes of xmm9
667+
pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9
668+
669+
pand %xmm1, %xmm3
670+
movdqa SHUF_MASK(%rip), %xmm10
671+
PSHUFB_XMM %xmm10, %xmm3
672+
PSHUFB_XMM %xmm2, %xmm3
673+
pxor %xmm3, \AAD_HASH
674+
675+
cmp $0, %r10
676+
jl _partial_incomplete_1_\@
677+
678+
# GHASH computation for the last <16 Byte block
679+
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
680+
xor %rax,%rax
681+
682+
mov %rax, PBlockLen(%arg2)
683+
jmp _dec_done_\@
684+
_partial_incomplete_1_\@:
685+
add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
686+
_dec_done_\@:
687+
movdqu \AAD_HASH, AadHash(%arg2)
688+
.else
689+
pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn)
690+
691+
mov \PLAIN_CYPH_LEN, %r10
692+
add %r13, %r10
693+
# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
694+
sub $16, %r10
695+
# Determine if if partial block is not being filled and
696+
# shift mask accordingly
697+
jge _no_extra_mask_2_\@
698+
sub %r10, %r12
699+
_no_extra_mask_2_\@:
700+
701+
movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
702+
# get the appropriate mask to mask out bottom r13 bytes of xmm9
703+
pand %xmm1, %xmm9
704+
705+
movdqa SHUF_MASK(%rip), %xmm1
706+
PSHUFB_XMM %xmm1, %xmm9
707+
PSHUFB_XMM %xmm2, %xmm9
708+
pxor %xmm9, \AAD_HASH
709+
710+
cmp $0, %r10
711+
jl _partial_incomplete_2_\@
712+
713+
# GHASH computation for the last <16 Byte block
714+
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
715+
xor %rax,%rax
716+
717+
mov %rax, PBlockLen(%arg2)
718+
jmp _encode_done_\@
719+
_partial_incomplete_2_\@:
720+
add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
721+
_encode_done_\@:
722+
movdqu \AAD_HASH, AadHash(%arg2)
723+
724+
movdqa SHUF_MASK(%rip), %xmm10
725+
# shuffle xmm9 back to output as ciphertext
726+
PSHUFB_XMM %xmm10, %xmm9
727+
PSHUFB_XMM %xmm2, %xmm9
728+
.endif
729+
# output encrypted Bytes
730+
cmp $0, %r10
731+
jl _partial_fill_\@
732+
mov %r13, %r12
733+
mov $16, %r13
734+
# Set r13 to be the number of bytes to write out
735+
sub %r12, %r13
736+
jmp _count_set_\@
737+
_partial_fill_\@:
738+
mov \PLAIN_CYPH_LEN, %r13
739+
_count_set_\@:
740+
movdqa %xmm9, %xmm0
741+
MOVQ_R64_XMM %xmm0, %rax
742+
cmp $8, %r13
743+
jle _less_than_8_bytes_left_\@
744+
745+
mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
746+
add $8, \DATA_OFFSET
747+
psrldq $8, %xmm0
748+
MOVQ_R64_XMM %xmm0, %rax
749+
sub $8, %r13
750+
_less_than_8_bytes_left_\@:
751+
movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
752+
add $1, \DATA_OFFSET
753+
shr $8, %rax
754+
sub $1, %r13
755+
jne _less_than_8_bytes_left_\@
756+
_partial_block_done_\@:
757+
.endm # PARTIAL_BLOCK
758+
609759
/*
610760
* if a = number of total plaintext bytes
611761
* b = floor(a/16)
@@ -624,7 +774,6 @@ _get_AAD_done\@:
624774

625775
movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0
626776

627-
xor %r11, %r11 # initialise the data pointer offset as zero
628777
# start AES for num_initial_blocks blocks
629778

630779
movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0

0 commit comments

Comments
 (0)