@@ -112,6 +112,14 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
112
112
// (for Karatsuba purposes)
113
113
#define VARIABLE_OFFSET 16*8
114
114
115
+ #define AadHash 16*0
116
+ #define AadLen 16*1
117
+ #define InLen (16*1 )+8
118
+ #define PBlockEncKey 16*2
119
+ #define OrigIV 16*3
120
+ #define CurCount 16*4
121
+ #define PBlockLen 16*5
122
+
115
123
#define arg1 rdi
116
124
#define arg2 rsi
117
125
#define arg3 rdx
@@ -122,6 +130,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
122
130
#define arg8 STACK_OFFSET+16 (%r14 )
123
131
#define arg9 STACK_OFFSET+24 (%r14 )
124
132
#define arg10 STACK_OFFSET+32 (%r14 )
133
+ #define arg11 STACK_OFFSET+40 (%r14 )
125
134
#define keysize 2*15*16 (%arg1)
126
135
#endif
127
136
@@ -196,9 +205,9 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
196
205
# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
197
206
# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
198
207
.macro GCM_INIT
199
- mov %arg6 , %r12
208
+ mov arg7 , %r12
200
209
movdqu (%r12 ), %xmm13
201
- movdqa SHUF_MASK(%rip ), %xmm2
210
+ movdqa SHUF_MASK(%rip ), %xmm2
202
211
PSHUFB_XMM %xmm2 , %xmm13
203
212
204
213
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
@@ -218,7 +227,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
218
227
pand POLY(%rip ), %xmm2
219
228
pxor %xmm2 , %xmm13
220
229
movdqa %xmm13 , HashKey(%rsp )
221
- mov %arg4 , %r13 # %xmm13 holds HashKey<<1 (mod poly)
230
+ mov %arg5 , %r13 # %xmm13 holds HashKey<<1 (mod poly)
222
231
and $-16 , %r13
223
232
mov %r13 , %r12
224
233
.endm
@@ -272,18 +281,18 @@ _four_cipher_left_\@:
272
281
GHASH_LAST_4 %xmm9 , %xmm10 , %xmm11 , %xmm12 , %xmm13 , %xmm14 , \
273
282
%xmm15 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8
274
283
_zero_cipher_left_\@:
275
- mov %arg4 , %r13
276
- and $15 , %r13 # %r13 = arg4 (mod 16)
284
+ mov %arg5 , %r13
285
+ and $15 , %r13 # %r13 = arg5 (mod 16)
277
286
je _multiple_of_16_bytes_\@
278
287
279
288
# Handle the last <16 Byte block separately
280
289
paddd ONE(%rip ), %xmm0 # INCR CNT to get Yn
281
- movdqa SHUF_MASK(%rip ), %xmm10
290
+ movdqa SHUF_MASK(%rip ), %xmm10
282
291
PSHUFB_XMM %xmm10 , %xmm0
283
292
284
293
ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # Encrypt(K, Yn)
285
294
286
- lea (%arg3 ,%r11 ,1 ), %r10
295
+ lea (%arg4 ,%r11 ,1 ), %r10
287
296
mov %r13 , %r12
288
297
READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
289
298
@@ -321,13 +330,13 @@ _zero_cipher_left_\@:
321
330
MOVQ_R64_XMM %xmm0 , %rax
322
331
cmp $8 , %r13
323
332
jle _less_than_8_bytes_left_\@
324
- mov %rax , (%arg2 , %r11 , 1 )
333
+ mov %rax , (%arg3 , %r11 , 1 )
325
334
add $8 , %r11
326
335
psrldq $8 , %xmm0
327
336
MOVQ_R64_XMM %xmm0 , %rax
328
337
sub $8 , %r13
329
338
_less_than_8_bytes_left_\@:
330
- mov %al , (%arg2 , %r11 , 1 )
339
+ mov %al , (%arg3 , %r11 , 1 )
331
340
add $1 , %r11
332
341
shr $8 , %rax
333
342
sub $1 , %r13
@@ -339,11 +348,11 @@ _multiple_of_16_bytes_\@:
339
348
# Output: Authorization Tag (AUTH_TAG)
340
349
# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
341
350
.macro GCM_COMPLETE
342
- mov arg8 , %r12 # %r13 = aadLen (number of bytes)
351
+ mov arg9 , %r12 # %r13 = aadLen (number of bytes)
343
352
shl $3 , %r12 # convert into number of bits
344
353
movd %r12d , %xmm15 # len(A) in %xmm15
345
- shl $3 , %arg4 # len(C) in bits (*128)
346
- MOVQ_R64_XMM %arg4 , %xmm1
354
+ shl $3 , %arg5 # len(C) in bits (*128)
355
+ MOVQ_R64_XMM %arg5 , %xmm1
347
356
pslldq $8 , %xmm15 # %xmm15 = len(A)||0x0000000000000000
348
357
pxor %xmm1 , %xmm15 # %xmm15 = len(A)||len(C)
349
358
pxor %xmm15 , %xmm8
@@ -352,13 +361,13 @@ _multiple_of_16_bytes_\@:
352
361
movdqa SHUF_MASK(%rip ), %xmm10
353
362
PSHUFB_XMM %xmm10 , %xmm8
354
363
355
- mov %arg5 , %rax # %rax = *Y0
364
+ mov %arg6 , %rax # %rax = *Y0
356
365
movdqu (%rax ), %xmm0 # %xmm0 = Y0
357
366
ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # E(K, Y0)
358
367
pxor %xmm8 , %xmm0
359
368
_return_T_\@:
360
- mov arg9 , %r10 # %r10 = authTag
361
- mov arg10 , %r11 # %r11 = auth_tag_len
369
+ mov arg10 , %r10 # %r10 = authTag
370
+ mov arg11 , %r11 # %r11 = auth_tag_len
362
371
cmp $16 , %r11
363
372
je _T_16_\@
364
373
cmp $8 , %r11
@@ -496,15 +505,15 @@ _done_read_partial_block_\@:
496
505
* the ciphertext
497
506
* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
498
507
* are clobbered
499
- * arg1, %arg2 , %arg3 , %r14 are used as a pointer only, not modified
508
+ * arg1, %arg3 , %arg4 , %r14 are used as a pointer only, not modified
500
509
*/
501
510
502
511
503
512
.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
504
513
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
505
514
MOVADQ SHUF_MASK(%rip ), %xmm14
506
- mov arg7 , %r10 # %r10 = AAD
507
- mov arg8 , %r11 # %r11 = aadLen
515
+ mov arg8 , %r10 # %r10 = AAD
516
+ mov arg9 , %r11 # %r11 = aadLen
508
517
pxor %xmm\i, %xmm\i
509
518
pxor \XMM2, \XMM2
510
519
@@ -536,7 +545,7 @@ _get_AAD_done\@:
536
545
xor %r11 , %r11 # initialise the data pointer offset as zero
537
546
# start AES for num_initial_blocks blocks
538
547
539
- mov %arg5 , %rax # %rax = *Y0
548
+ mov %arg6 , %rax # %rax = *Y0
540
549
movdqu (%rax ), \XMM0 # XMM0 = Y0
541
550
PSHUFB_XMM %xmm14 , \XMM0
542
551
@@ -573,9 +582,9 @@ aes_loop_initial_\@:
573
582
AESENCLAST \TMP1, %xmm\index # Last Round
574
583
.endr
575
584
.irpc index, \i_seq
576
- movdqu (%arg3 , %r11 , 1 ), \TMP1
585
+ movdqu (%arg4 , %r11 , 1 ), \TMP1
577
586
pxor \TMP1, %xmm\index
578
- movdqu %xmm\index, (%arg2 , %r11 , 1 )
587
+ movdqu %xmm\index, (%arg3 , %r11 , 1 )
579
588
# write back plaintext/ciphertext for num_initial_blocks
580
589
add $16 , %r11
581
590
@@ -694,34 +703,34 @@ aes_loop_pre_done\@:
694
703
AESENCLAST \TMP2, \XMM2
695
704
AESENCLAST \TMP2, \XMM3
696
705
AESENCLAST \TMP2, \XMM4
697
- movdqu 16*0 (%arg3 , %r11 , 1 ), \TMP1
706
+ movdqu 16*0 (%arg4 , %r11 , 1 ), \TMP1
698
707
pxor \TMP1, \XMM1
699
708
.ifc \operation, dec
700
- movdqu \XMM1, 16*0 (%arg2 , %r11 , 1 )
709
+ movdqu \XMM1, 16*0 (%arg3 , %r11 , 1 )
701
710
movdqa \TMP1, \XMM1
702
711
.endif
703
- movdqu 16*1 (%arg3 , %r11 , 1 ), \TMP1
712
+ movdqu 16*1 (%arg4 , %r11 , 1 ), \TMP1
704
713
pxor \TMP1, \XMM2
705
714
.ifc \operation, dec
706
- movdqu \XMM2, 16*1 (%arg2 , %r11 , 1 )
715
+ movdqu \XMM2, 16*1 (%arg3 , %r11 , 1 )
707
716
movdqa \TMP1, \XMM2
708
717
.endif
709
- movdqu 16*2 (%arg3 , %r11 , 1 ), \TMP1
718
+ movdqu 16*2 (%arg4 , %r11 , 1 ), \TMP1
710
719
pxor \TMP1, \XMM3
711
720
.ifc \operation, dec
712
- movdqu \XMM3, 16*2 (%arg2 , %r11 , 1 )
721
+ movdqu \XMM3, 16*2 (%arg3 , %r11 , 1 )
713
722
movdqa \TMP1, \XMM3
714
723
.endif
715
- movdqu 16*3 (%arg3 , %r11 , 1 ), \TMP1
724
+ movdqu 16*3 (%arg4 , %r11 , 1 ), \TMP1
716
725
pxor \TMP1, \XMM4
717
726
.ifc \operation, dec
718
- movdqu \XMM4, 16*3 (%arg2 , %r11 , 1 )
727
+ movdqu \XMM4, 16*3 (%arg3 , %r11 , 1 )
719
728
movdqa \TMP1, \XMM4
720
729
.else
721
- movdqu \XMM1, 16*0 (%arg2 , %r11 , 1 )
722
- movdqu \XMM2, 16*1 (%arg2 , %r11 , 1 )
723
- movdqu \XMM3, 16*2 (%arg2 , %r11 , 1 )
724
- movdqu \XMM4, 16*3 (%arg2 , %r11 , 1 )
730
+ movdqu \XMM1, 16*0 (%arg3 , %r11 , 1 )
731
+ movdqu \XMM2, 16*1 (%arg3 , %r11 , 1 )
732
+ movdqu \XMM3, 16*2 (%arg3 , %r11 , 1 )
733
+ movdqu \XMM4, 16*3 (%arg3 , %r11 , 1 )
725
734
.endif
726
735
727
736
add $64 , %r11
@@ -739,7 +748,7 @@ _initial_blocks_done\@:
739
748
/*
740
749
* encrypt 4 blocks at a time
741
750
* ghash the 4 previously encrypted ciphertext blocks
742
- * arg1, %arg2 , %arg3 are used as pointers only, not modified
751
+ * arg1, %arg3 , %arg4 are used as pointers only, not modified
743
752
* %r11 is the data offset value
744
753
*/
745
754
.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -883,18 +892,18 @@ aes_loop_par_enc_done:
883
892
AESENCLAST \TMP3, \XMM4
884
893
movdqa HashKey_k(%rsp ), \TMP5
885
894
PCLMULQDQ 0x00 , \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
886
- movdqu (%arg3 ,%r11 ,1 ), \TMP3
895
+ movdqu (%arg4 ,%r11 ,1 ), \TMP3
887
896
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
888
- movdqu 16 (%arg3 ,%r11 ,1 ), \TMP3
897
+ movdqu 16 (%arg4 ,%r11 ,1 ), \TMP3
889
898
pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
890
- movdqu 32 (%arg3 ,%r11 ,1 ), \TMP3
899
+ movdqu 32 (%arg4 ,%r11 ,1 ), \TMP3
891
900
pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
892
- movdqu 48 (%arg3 ,%r11 ,1 ), \TMP3
901
+ movdqu 48 (%arg4 ,%r11 ,1 ), \TMP3
893
902
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
894
- movdqu \XMM1, (%arg2 ,%r11 ,1 ) # Write to the ciphertext buffer
895
- movdqu \XMM2, 16 (%arg2 ,%r11 ,1 ) # Write to the ciphertext buffer
896
- movdqu \XMM3, 32 (%arg2 ,%r11 ,1 ) # Write to the ciphertext buffer
897
- movdqu \XMM4, 48 (%arg2 ,%r11 ,1 ) # Write to the ciphertext buffer
903
+ movdqu \XMM1, (%arg3 ,%r11 ,1 ) # Write to the ciphertext buffer
904
+ movdqu \XMM2, 16 (%arg3 ,%r11 ,1 ) # Write to the ciphertext buffer
905
+ movdqu \XMM3, 32 (%arg3 ,%r11 ,1 ) # Write to the ciphertext buffer
906
+ movdqu \XMM4, 48 (%arg3 ,%r11 ,1 ) # Write to the ciphertext buffer
898
907
PSHUFB_XMM %xmm15 , \XMM1 # perform a 16 byte swap
899
908
PSHUFB_XMM %xmm15 , \XMM2 # perform a 16 byte swap
900
909
PSHUFB_XMM %xmm15 , \XMM3 # perform a 16 byte swap
@@ -947,7 +956,7 @@ aes_loop_par_enc_done:
947
956
/*
948
957
* decrypt 4 blocks at a time
949
958
* ghash the 4 previously decrypted ciphertext blocks
950
- * arg1, %arg2 , %arg3 are used as pointers only, not modified
959
+ * arg1, %arg3 , %arg4 are used as pointers only, not modified
951
960
* %r11 is the data offset value
952
961
*/
953
962
.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -1091,21 +1100,21 @@ aes_loop_par_dec_done:
1091
1100
AESENCLAST \TMP3, \XMM4
1092
1101
movdqa HashKey_k(%rsp ), \TMP5
1093
1102
PCLMULQDQ 0x00 , \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
1094
- movdqu (%arg3 ,%r11 ,1 ), \TMP3
1103
+ movdqu (%arg4 ,%r11 ,1 ), \TMP3
1095
1104
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
1096
- movdqu \XMM1, (%arg2 ,%r11 ,1 ) # Write to plaintext buffer
1105
+ movdqu \XMM1, (%arg3 ,%r11 ,1 ) # Write to plaintext buffer
1097
1106
movdqa \TMP3, \XMM1
1098
- movdqu 16 (%arg3 ,%r11 ,1 ), \TMP3
1107
+ movdqu 16 (%arg4 ,%r11 ,1 ), \TMP3
1099
1108
pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
1100
- movdqu \XMM2, 16 (%arg2 ,%r11 ,1 ) # Write to plaintext buffer
1109
+ movdqu \XMM2, 16 (%arg3 ,%r11 ,1 ) # Write to plaintext buffer
1101
1110
movdqa \TMP3, \XMM2
1102
- movdqu 32 (%arg3 ,%r11 ,1 ), \TMP3
1111
+ movdqu 32 (%arg4 ,%r11 ,1 ), \TMP3
1103
1112
pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
1104
- movdqu \XMM3, 32 (%arg2 ,%r11 ,1 ) # Write to plaintext buffer
1113
+ movdqu \XMM3, 32 (%arg3 ,%r11 ,1 ) # Write to plaintext buffer
1105
1114
movdqa \TMP3, \XMM3
1106
- movdqu 48 (%arg3 ,%r11 ,1 ), \TMP3
1115
+ movdqu 48 (%arg4 ,%r11 ,1 ), \TMP3
1107
1116
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
1108
- movdqu \XMM4, 48 (%arg2 ,%r11 ,1 ) # Write to plaintext buffer
1117
+ movdqu \XMM4, 48 (%arg3 ,%r11 ,1 ) # Write to plaintext buffer
1109
1118
movdqa \TMP3, \XMM4
1110
1119
PSHUFB_XMM %xmm15 , \XMM1 # perform a 16 byte swap
1111
1120
PSHUFB_XMM %xmm15 , \XMM2 # perform a 16 byte swap
@@ -1278,6 +1287,8 @@ _esb_loop_\@:
1278
1287
.endm
1279
1288
/*****************************************************************************
1280
1289
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
1290
+ * struct gcm_context_data *data
1291
+ * // Context data
1281
1292
* u8 *out, // Plaintext output. Encrypt in-place is allowed.
1282
1293
* const u8 *in, // Ciphertext input
1283
1294
* u64 plaintext_len, // Length of data in bytes for decryption.
@@ -1367,6 +1378,8 @@ ENDPROC(aesni_gcm_dec)
1367
1378
1368
1379
/*****************************************************************************
1369
1380
* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
1381
+ * struct gcm_context_data *data
1382
+ * // Context data
1370
1383
* u8 *out, // Ciphertext output. Encrypt in-place is allowed.
1371
1384
* const u8 *in, // Plaintext input
1372
1385
* u64 plaintext_len, // Length of data in bytes for encryption.
0 commit comments