@@ -285,7 +285,13 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
285
285
movdqu AadHash(%arg2), %xmm8
286
286
movdqu HashKey(%arg2), %xmm13
287
287
add %arg5, InLen(%arg2)
288
+
289
+ xor %r11 , %r11 # initialise the data pointer offset as zero
290
+ PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
291
+
292
+ sub %r11 , %arg5 # sub partial block data used
288
293
mov %arg5, %r13 # save the number of bytes
294
+
289
295
and $-16 , %r13 # %r13 = %r13 - (%r13 mod 16)
290
296
mov %r13 , %r12
291
297
# Encrypt/Decrypt first few blocks
@@ -606,6 +612,150 @@ _get_AAD_done\@:
606
612
movdqu \TMP6, AadHash(%arg2)
607
613
.endm
608
614
615
+ # PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
616
+ # between update calls.
617
+ # Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
618
+ # Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
619
+ # Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
620
+ .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
621
+ AAD_HASH operation
622
+ mov PBlockLen(%arg2), %r13
623
+ cmp $0 , %r13
624
+ je _partial_block_done_\@ # Leave Macro if no partial blocks
625
+ # Read in input data without over reading
626
+ cmp $16 , \PLAIN_CYPH_LEN
627
+ jl _fewer_than_16_bytes_\@
628
+ movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
629
+ jmp _data_read_\@
630
+
631
+ _fewer_than_16_bytes_\@:
632
+ lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1 ), %r10
633
+ mov \PLAIN_CYPH_LEN, %r12
634
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
635
+
636
+ mov PBlockLen(%arg2), %r13
637
+
638
+ _data_read_\@: # Finished reading in data
639
+
640
+ movdqu PBlockEncKey(%arg2), %xmm9
641
+ movdqu HashKey(%arg2), %xmm13
642
+
643
+ lea SHIFT_MASK(%rip ), %r12
644
+
645
+ # adjust the shuffle mask pointer to be able to shift r13 bytes
646
+ # r16-r13 is the number of bytes in plaintext mod 16)
647
+ add %r13 , %r12
648
+ movdqu (%r12 ), %xmm2 # get the appropriate shuffle mask
649
+ PSHUFB_XMM %xmm2 , %xmm9 # shift right r13 bytes
650
+
651
+ .ifc \operation, dec
652
+ movdqa %xmm1 , %xmm3
653
+ pxor %xmm1 , %xmm9 # Cyphertext XOR E(K, Yn)
654
+
655
+ mov \PLAIN_CYPH_LEN, %r10
656
+ add %r13 , %r10
657
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
658
+ sub $16 , %r10
659
+ # Determine if if partial block is not being filled and
660
+ # shift mask accordingly
661
+ jge _no_extra_mask_1_\@
662
+ sub %r10 , %r12
663
+ _no_extra_mask_1_\@:
664
+
665
+ movdqu ALL_F-SHIFT_MASK(%r12 ), %xmm1
666
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
667
+ pand %xmm1 , %xmm9 # mask out bottom r13 bytes of xmm9
668
+
669
+ pand %xmm1 , %xmm3
670
+ movdqa SHUF_MASK(%rip ), %xmm10
671
+ PSHUFB_XMM %xmm10 , %xmm3
672
+ PSHUFB_XMM %xmm2 , %xmm3
673
+ pxor %xmm3 , \AAD_HASH
674
+
675
+ cmp $0 , %r10
676
+ jl _partial_incomplete_1_\@
677
+
678
+ # GHASH computation for the last <16 Byte block
679
+ GHASH_MUL \AAD_HASH, %xmm13 , %xmm0 , %xmm10 , %xmm11 , %xmm5 , %xmm6
680
+ xor %rax ,%rax
681
+
682
+ mov %rax , PBlockLen(%arg2)
683
+ jmp _dec_done_\@
684
+ _partial_incomplete_1_\@:
685
+ add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
686
+ _dec_done_\@:
687
+ movdqu \AAD_HASH, AadHash(%arg2)
688
+ .else
689
+ pxor %xmm1 , %xmm9 # Plaintext XOR E(K, Yn)
690
+
691
+ mov \PLAIN_CYPH_LEN, %r10
692
+ add %r13 , %r10
693
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
694
+ sub $16 , %r10
695
+ # Determine if if partial block is not being filled and
696
+ # shift mask accordingly
697
+ jge _no_extra_mask_2_\@
698
+ sub %r10 , %r12
699
+ _no_extra_mask_2_\@:
700
+
701
+ movdqu ALL_F-SHIFT_MASK(%r12 ), %xmm1
702
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
703
+ pand %xmm1 , %xmm9
704
+
705
+ movdqa SHUF_MASK(%rip ), %xmm1
706
+ PSHUFB_XMM %xmm1 , %xmm9
707
+ PSHUFB_XMM %xmm2 , %xmm9
708
+ pxor %xmm9 , \AAD_HASH
709
+
710
+ cmp $0 , %r10
711
+ jl _partial_incomplete_2_\@
712
+
713
+ # GHASH computation for the last <16 Byte block
714
+ GHASH_MUL \AAD_HASH, %xmm13 , %xmm0 , %xmm10 , %xmm11 , %xmm5 , %xmm6
715
+ xor %rax ,%rax
716
+
717
+ mov %rax , PBlockLen(%arg2)
718
+ jmp _encode_done_\@
719
+ _partial_incomplete_2_\@:
720
+ add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
721
+ _encode_done_\@:
722
+ movdqu \AAD_HASH, AadHash(%arg2)
723
+
724
+ movdqa SHUF_MASK(%rip ), %xmm10
725
+ # shuffle xmm9 back to output as ciphertext
726
+ PSHUFB_XMM %xmm10 , %xmm9
727
+ PSHUFB_XMM %xmm2 , %xmm9
728
+ .endif
729
+ # output encrypted Bytes
730
+ cmp $0 , %r10
731
+ jl _partial_fill_\@
732
+ mov %r13 , %r12
733
+ mov $16 , %r13
734
+ # Set r13 to be the number of bytes to write out
735
+ sub %r12 , %r13
736
+ jmp _count_set_\@
737
+ _partial_fill_\@:
738
+ mov \PLAIN_CYPH_LEN, %r13
739
+ _count_set_\@:
740
+ movdqa %xmm9 , %xmm0
741
+ MOVQ_R64_XMM %xmm0 , %rax
742
+ cmp $8 , %r13
743
+ jle _less_than_8_bytes_left_\@
744
+
745
+ mov %rax , (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1 )
746
+ add $8 , \DATA_OFFSET
747
+ psrldq $8 , %xmm0
748
+ MOVQ_R64_XMM %xmm0 , %rax
749
+ sub $8 , %r13
750
+ _less_than_8_bytes_left_\@:
751
+ movb %al , (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1 )
752
+ add $1 , \DATA_OFFSET
753
+ shr $8 , %rax
754
+ sub $1 , %r13
755
+ jne _less_than_8_bytes_left_\@
756
+ _partial_block_done_\@:
757
+ .endm # PARTIAL_BLOCK
758
+
609
759
/*
610
760
* if a = number of total plaintext bytes
611
761
* b = floor(a/16)
@@ -624,7 +774,6 @@ _get_AAD_done\@:
624
774
625
775
movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0
626
776
627
- xor %r11 , %r11 # initialise the data pointer offset as zero
628
777
# start AES for num_initial_blocks blocks
629
778
630
779
movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0
0 commit comments