@@ -256,6 +256,37 @@ aad_shift_arr:
256
256
pxor \TMP1, \GH # result is in TMP1
257
257
.endm
258
258
259
+ # Reads DLEN bytes starting at DPTR and stores in XMMDst
260
+ # where 0 < DLEN < 16
261
+ # Clobbers %rax, DLEN and XMM1
262
+ .macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
263
+ cmp $8 , \DLEN
264
+ jl _read_lt8_\@
265
+ mov (\DPTR), %rax
266
+ MOVQ_R64_XMM %rax , \XMMDst
267
+ sub $8 , \DLEN
268
+ jz _done_read_partial_block_\@
269
+ xor %eax , %eax
270
+ _read_next_byte_\@:
271
+ shl $8 , %rax
272
+ mov 7 (\DPTR, \DLEN, 1 ), %al
273
+ dec \DLEN
274
+ jnz _read_next_byte_\@
275
+ MOVQ_R64_XMM %rax , \XMM1
276
+ pslldq $8 , \XMM1
277
+ por \XMM1, \XMMDst
278
+ jmp _done_read_partial_block_\@
279
+ _read_lt8_\@:
280
+ xor %eax , %eax
281
+ _read_next_byte_lt8_\@:
282
+ shl $8 , %rax
283
+ mov -1 (\DPTR, \DLEN, 1 ), %al
284
+ dec \DLEN
285
+ jnz _read_next_byte_lt8_\@
286
+ MOVQ_R64_XMM %rax , \XMMDst
287
+ _done_read_partial_block_\@:
288
+ .endm
289
+
259
290
/*
260
291
* if a = number of total plaintext bytes
261
292
* b = floor(a/16)
@@ -1385,14 +1416,6 @@ _esb_loop_\@:
1385
1416
*
1386
1417
* AAD Format with 64-bit Extended Sequence Number
1387
1418
*
1388
- * aadLen:
1389
- * from the definition of the spec, aadLen can only be 8 or 12 bytes.
1390
- * The code supports 16 too but for other sizes, the code will fail.
1391
- *
1392
- * TLen:
1393
- * from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
1394
- * For other sizes, the code will fail.
1395
- *
1396
1419
* poly = x^128 + x^127 + x^126 + x^121 + 1
1397
1420
*
1398
1421
*****************************************************************************/
@@ -1486,19 +1509,16 @@ _zero_cipher_left_decrypt:
1486
1509
PSHUFB_XMM %xmm10 , %xmm0
1487
1510
1488
1511
ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # E(K, Yn)
1489
- sub $16 , %r11
1490
- add %r13 , %r11
1491
- movdqu (%arg3,%r11 ,1 ), %xmm1 # receive the last <16 byte block
1492
- lea SHIFT_MASK+16 (%rip ), %r12
1493
- sub %r13 , %r12
1494
- # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
1495
- # (%r13 is the number of bytes in plaintext mod 16)
1496
- movdqu (%r12 ), %xmm2 # get the appropriate shuffle mask
1497
- PSHUFB_XMM %xmm2 , %xmm1 # right shift 16-%r13 butes
1498
1512
1513
+ lea (%arg3,%r11 ,1 ), %r10
1514
+ mov %r13 , %r12
1515
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
1516
+
1517
+ lea ALL_F+16 (%rip ), %r12
1518
+ sub %r13 , %r12
1499
1519
movdqa %xmm1 , %xmm2
1500
1520
pxor %xmm1 , %xmm0 # Ciphertext XOR E(K, Yn)
1501
- movdqu ALL_F-SHIFT_MASK (%r12 ), %xmm1
1521
+ movdqu (%r12 ), %xmm1
1502
1522
# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
1503
1523
pand %xmm1 , %xmm0 # mask out top 16-%r13 bytes of %xmm0
1504
1524
pand %xmm1 , %xmm2
@@ -1507,9 +1527,6 @@ _zero_cipher_left_decrypt:
1507
1527
1508
1528
pxor %xmm2 , %xmm8
1509
1529
GHASH_MUL %xmm8 , %xmm13 , %xmm9 , %xmm10 , %xmm11 , %xmm5 , %xmm6
1510
- # GHASH computation for the last <16 byte block
1511
- sub %r13 , %r11
1512
- add $16 , %r11
1513
1530
1514
1531
# output %r13 bytes
1515
1532
MOVQ_R64_XMM %xmm0 , %rax
@@ -1663,14 +1680,6 @@ ENDPROC(aesni_gcm_dec)
1663
1680
*
1664
1681
* AAD Format with 64-bit Extended Sequence Number
1665
1682
*
1666
- * aadLen:
1667
- * from the definition of the spec, aadLen can only be 8 or 12 bytes.
1668
- * The code supports 16 too but for other sizes, the code will fail.
1669
- *
1670
- * TLen:
1671
- * from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
1672
- * For other sizes, the code will fail.
1673
- *
1674
1683
* poly = x^128 + x^127 + x^126 + x^121 + 1
1675
1684
***************************************************************************/
1676
1685
ENTRY(aesni_gcm_enc)
@@ -1763,19 +1772,16 @@ _zero_cipher_left_encrypt:
1763
1772
movdqa SHUF_MASK(%rip ), %xmm10
1764
1773
PSHUFB_XMM %xmm10 , %xmm0
1765
1774
1766
-
1767
1775
ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # Encrypt(K, Yn)
1768
- sub $16 , %r11
1769
- add %r13 , %r11
1770
- movdqu (%arg3,%r11 ,1 ), %xmm1 # receive the last <16 byte blocks
1771
- lea SHIFT_MASK+16 (%rip ), %r12
1776
+
1777
+ lea (%arg3,%r11 ,1 ), %r10
1778
+ mov %r13 , %r12
1779
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
1780
+
1781
+ lea ALL_F+16 (%rip ), %r12
1772
1782
sub %r13 , %r12
1773
- # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
1774
- # (%r13 is the number of bytes in plaintext mod 16)
1775
- movdqu (%r12 ), %xmm2 # get the appropriate shuffle mask
1776
- PSHUFB_XMM %xmm2 , %xmm1 # shift right 16-r13 byte
1777
1783
pxor %xmm1 , %xmm0 # Plaintext XOR Encrypt(K, Yn)
1778
- movdqu ALL_F-SHIFT_MASK (%r12 ), %xmm1
1784
+ movdqu (%r12 ), %xmm1
1779
1785
# get the appropriate mask to mask out top 16-r13 bytes of xmm0
1780
1786
pand %xmm1 , %xmm0 # mask out top 16-r13 bytes of xmm0
1781
1787
movdqa SHUF_MASK(%rip ), %xmm10
@@ -1784,9 +1790,6 @@ _zero_cipher_left_encrypt:
1784
1790
pxor %xmm0 , %xmm8
1785
1791
GHASH_MUL %xmm8 , %xmm13 , %xmm9 , %xmm10 , %xmm11 , %xmm5 , %xmm6
1786
1792
# GHASH computation for the last <16 byte block
1787
- sub %r13 , %r11
1788
- add $16 , %r11
1789
-
1790
1793
movdqa SHUF_MASK(%rip ), %xmm10
1791
1794
PSHUFB_XMM %xmm10 , %xmm0
1792
1795
0 commit comments