@@ -223,6 +223,118 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
223
223
mov %r13 , %r12
224
224
.endm
225
225
226
+ # GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
227
+ # struct has been initialized by GCM_INIT.
228
+ # Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
229
+ # Clobbers rax, r10-r13, and xmm0-xmm15
230
+ .macro GCM_ENC_DEC operation
231
+ # Encrypt/Decrypt first few blocks
232
+
233
+ and $(3 <<4 ), %r12
234
+ jz _initial_num_blocks_is_0_\@
235
+ cmp $(2 <<4 ), %r12
236
+ jb _initial_num_blocks_is_1_\@
237
+ je _initial_num_blocks_is_2_\@
238
+ _initial_num_blocks_is_3_\@:
239
+ INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
240
+ %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 5 , 678 , \operation
241
+ sub $48 , %r13
242
+ jmp _initial_blocks_\@
243
+ _initial_num_blocks_is_2_\@:
244
+ INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
245
+ %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 6 , 78 , \operation
246
+ sub $32 , %r13
247
+ jmp _initial_blocks_\@
248
+ _initial_num_blocks_is_1_\@:
249
+ INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
250
+ %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 7 , 8 , \operation
251
+ sub $16 , %r13
252
+ jmp _initial_blocks_\@
253
+ _initial_num_blocks_is_0_\@:
254
+ INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
255
+ %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 8 , 0 , \operation
256
+ _initial_blocks_\@:
257
+
258
+ # Main loop - Encrypt/Decrypt remaining blocks
259
+
260
+ cmp $0 , %r13
261
+ je _zero_cipher_left_\@
262
+ sub $64 , %r13
263
+ je _four_cipher_left_\@
264
+ _crypt_by_4_\@:
265
+ GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9 , %xmm10 , %xmm11 , %xmm12 , \
266
+ %xmm13 , %xmm14 , %xmm0 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm5 , %xmm6 , \
267
+ %xmm7 , %xmm8 , enc
268
+ add $64 , %r11
269
+ sub $64 , %r13
270
+ jne _crypt_by_4_\@
271
+ _four_cipher_left_\@:
272
+ GHASH_LAST_4 %xmm9 , %xmm10 , %xmm11 , %xmm12 , %xmm13 , %xmm14 , \
273
+ %xmm15 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8
274
+ _zero_cipher_left_\@:
275
+ mov %arg4, %r13
276
+ and $15 , %r13 # %r13 = arg4 (mod 16)
277
+ je _multiple_of_16_bytes_\@
278
+
279
+ # Handle the last <16 Byte block separately
280
+ paddd ONE(%rip ), %xmm0 # INCR CNT to get Yn
281
+ movdqa SHUF_MASK(%rip ), %xmm10
282
+ PSHUFB_XMM %xmm10 , %xmm0
283
+
284
+ ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # Encrypt(K, Yn)
285
+
286
+ lea (%arg3,%r11 ,1 ), %r10
287
+ mov %r13 , %r12
288
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
289
+
290
+ lea ALL_F+16 (%rip ), %r12
291
+ sub %r13 , %r12
292
+ .ifc \operation, dec
293
+ movdqa %xmm1 , %xmm2
294
+ .endif
295
+ pxor %xmm1 , %xmm0 # XOR Encrypt(K, Yn)
296
+ movdqu (%r12 ), %xmm1
297
+ # get the appropriate mask to mask out top 16-r13 bytes of xmm0
298
+ pand %xmm1 , %xmm0 # mask out top 16-r13 bytes of xmm0
299
+ .ifc \operation, dec
300
+ pand %xmm1 , %xmm2
301
+ movdqa SHUF_MASK(%rip ), %xmm10
302
+ PSHUFB_XMM %xmm10 ,%xmm2
303
+
304
+ pxor %xmm2 , %xmm8
305
+ .else
306
+ movdqa SHUF_MASK(%rip ), %xmm10
307
+ PSHUFB_XMM %xmm10 ,%xmm0
308
+
309
+ pxor %xmm0 , %xmm8
310
+ .endif
311
+
312
+ GHASH_MUL %xmm8 , %xmm13 , %xmm9 , %xmm10 , %xmm11 , %xmm5 , %xmm6
313
+ .ifc \operation, enc
314
+ # GHASH computation for the last <16 byte block
315
+ movdqa SHUF_MASK(%rip ), %xmm10
316
+ # shuffle xmm0 back to output as ciphertext
317
+ PSHUFB_XMM %xmm10 , %xmm0
318
+ .endif
319
+
320
+ # Output %r13 bytes
321
+ MOVQ_R64_XMM %xmm0 , %rax
322
+ cmp $8 , %r13
323
+ jle _less_than_8_bytes_left_\@
324
+ mov %rax , (%arg2 , %r11 , 1 )
325
+ add $8 , %r11
326
+ psrldq $8 , %xmm0
327
+ MOVQ_R64_XMM %xmm0 , %rax
328
+ sub $8 , %r13
329
+ _less_than_8_bytes_left_\@:
330
+ mov %al , (%arg2, %r11 , 1 )
331
+ add $1 , %r11
332
+ shr $8 , %rax
333
+ sub $1 , %r13
334
+ jne _less_than_8_bytes_left_\@
335
+ _multiple_of_16_bytes_\@:
336
+ .endm
337
+
226
338
# GCM_COMPLETE Finishes update of tag of last partial block
227
339
# Output: Authorization Tag (AUTH_TAG)
228
340
# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
@@ -1246,93 +1358,7 @@ ENTRY(aesni_gcm_dec)
1246
1358
FUNC_SAVE
1247
1359
1248
1360
GCM_INIT
1249
-
1250
- # Decrypt first few blocks
1251
-
1252
- and $(3 <<4 ), %r12
1253
- jz _initial_num_blocks_is_0_decrypt
1254
- cmp $(2 <<4 ), %r12
1255
- jb _initial_num_blocks_is_1_decrypt
1256
- je _initial_num_blocks_is_2_decrypt
1257
- _initial_num_blocks_is_3_decrypt:
1258
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1259
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 5 , 678 , dec
1260
- sub $48 , %r13
1261
- jmp _initial_blocks_decrypted
1262
- _initial_num_blocks_is_2_decrypt:
1263
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1264
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 6 , 78 , dec
1265
- sub $32 , %r13
1266
- jmp _initial_blocks_decrypted
1267
- _initial_num_blocks_is_1_decrypt:
1268
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1269
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 7 , 8 , dec
1270
- sub $16 , %r13
1271
- jmp _initial_blocks_decrypted
1272
- _initial_num_blocks_is_0_decrypt:
1273
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1274
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 8 , 0 , dec
1275
- _initial_blocks_decrypted:
1276
- cmp $0 , %r13
1277
- je _zero_cipher_left_decrypt
1278
- sub $64 , %r13
1279
- je _four_cipher_left_decrypt
1280
- _decrypt_by_4:
1281
- GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9 , %xmm10 , %xmm11 , %xmm12 , %xmm13 , \
1282
- %xmm14 , %xmm0 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm5 , %xmm6 , %xmm7 , %xmm8 , dec
1283
- add $64 , %r11
1284
- sub $64 , %r13
1285
- jne _decrypt_by_4
1286
- _four_cipher_left_decrypt:
1287
- GHASH_LAST_4 %xmm9 , %xmm10 , %xmm11 , %xmm12 , %xmm13 , %xmm14 , \
1288
- %xmm15 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8
1289
- _zero_cipher_left_decrypt:
1290
- mov %arg4, %r13
1291
- and $15 , %r13 # %r13 = arg4 (mod 16)
1292
- je _multiple_of_16_bytes_decrypt
1293
-
1294
- # Handle the last <16 byte block separately
1295
-
1296
- paddd ONE(%rip ), %xmm0 # increment CNT to get Yn
1297
- movdqa SHUF_MASK(%rip ), %xmm10
1298
- PSHUFB_XMM %xmm10 , %xmm0
1299
-
1300
- ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # E(K, Yn)
1301
-
1302
- lea (%arg3,%r11 ,1 ), %r10
1303
- mov %r13 , %r12
1304
- READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
1305
-
1306
- lea ALL_F+16 (%rip ), %r12
1307
- sub %r13 , %r12
1308
- movdqa %xmm1 , %xmm2
1309
- pxor %xmm1 , %xmm0 # Ciphertext XOR E(K, Yn)
1310
- movdqu (%r12 ), %xmm1
1311
- # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
1312
- pand %xmm1 , %xmm0 # mask out top 16-%r13 bytes of %xmm0
1313
- pand %xmm1 , %xmm2
1314
- movdqa SHUF_MASK(%rip ), %xmm10
1315
- PSHUFB_XMM %xmm10 ,%xmm2
1316
-
1317
- pxor %xmm2 , %xmm8
1318
- GHASH_MUL %xmm8 , %xmm13 , %xmm9 , %xmm10 , %xmm11 , %xmm5 , %xmm6
1319
-
1320
- # output %r13 bytes
1321
- MOVQ_R64_XMM %xmm0 , %rax
1322
- cmp $8 , %r13
1323
- jle _less_than_8_bytes_left_decrypt
1324
- mov %rax , (%arg2 , %r11 , 1 )
1325
- add $8 , %r11
1326
- psrldq $8 , %xmm0
1327
- MOVQ_R64_XMM %xmm0 , %rax
1328
- sub $8 , %r13
1329
- _less_than_8_bytes_left_decrypt:
1330
- mov %al , (%arg2, %r11 , 1 )
1331
- add $1 , %r11
1332
- shr $8 , %rax
1333
- sub $1 , %r13
1334
- jne _less_than_8_bytes_left_decrypt
1335
- _multiple_of_16_bytes_decrypt:
1361
+ GCM_ENC_DEC dec
1336
1362
GCM_COMPLETE
1337
1363
FUNC_RESTORE
1338
1364
ret
@@ -1418,98 +1444,7 @@ ENTRY(aesni_gcm_enc)
1418
1444
FUNC_SAVE
1419
1445
1420
1446
GCM_INIT
1421
- # Encrypt first few blocks
1422
-
1423
- and $(3 <<4 ), %r12
1424
- jz _initial_num_blocks_is_0_encrypt
1425
- cmp $(2 <<4 ), %r12
1426
- jb _initial_num_blocks_is_1_encrypt
1427
- je _initial_num_blocks_is_2_encrypt
1428
- _initial_num_blocks_is_3_encrypt:
1429
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1430
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 5 , 678 , enc
1431
- sub $48 , %r13
1432
- jmp _initial_blocks_encrypted
1433
- _initial_num_blocks_is_2_encrypt:
1434
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1435
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 6 , 78 , enc
1436
- sub $32 , %r13
1437
- jmp _initial_blocks_encrypted
1438
- _initial_num_blocks_is_1_encrypt:
1439
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1440
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 7 , 8 , enc
1441
- sub $16 , %r13
1442
- jmp _initial_blocks_encrypted
1443
- _initial_num_blocks_is_0_encrypt:
1444
- INITIAL_BLOCKS_ENC_DEC %xmm9 , %xmm10 , %xmm13 , %xmm11 , %xmm12 , %xmm0 , \
1445
- %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8 , %xmm5 , %xmm6 , 8 , 0 , enc
1446
- _initial_blocks_encrypted:
1447
-
1448
- # Main loop - Encrypt remaining blocks
1449
-
1450
- cmp $0 , %r13
1451
- je _zero_cipher_left_encrypt
1452
- sub $64 , %r13
1453
- je _four_cipher_left_encrypt
1454
- _encrypt_by_4_encrypt:
1455
- GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9 , %xmm10 , %xmm11 , %xmm12 , %xmm13 , \
1456
- %xmm14 , %xmm0 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm5 , %xmm6 , %xmm7 , %xmm8 , enc
1457
- add $64 , %r11
1458
- sub $64 , %r13
1459
- jne _encrypt_by_4_encrypt
1460
- _four_cipher_left_encrypt:
1461
- GHASH_LAST_4 %xmm9 , %xmm10 , %xmm11 , %xmm12 , %xmm13 , %xmm14 , \
1462
- %xmm15 , %xmm1 , %xmm2 , %xmm3 , %xmm4 , %xmm8
1463
- _zero_cipher_left_encrypt:
1464
- mov %arg4, %r13
1465
- and $15 , %r13 # %r13 = arg4 (mod 16)
1466
- je _multiple_of_16_bytes_encrypt
1467
-
1468
- # Handle the last <16 Byte block separately
1469
- paddd ONE(%rip ), %xmm0 # INCR CNT to get Yn
1470
- movdqa SHUF_MASK(%rip ), %xmm10
1471
- PSHUFB_XMM %xmm10 , %xmm0
1472
-
1473
- ENCRYPT_SINGLE_BLOCK %xmm0 , %xmm1 # Encrypt(K, Yn)
1474
-
1475
- lea (%arg3,%r11 ,1 ), %r10
1476
- mov %r13 , %r12
1477
- READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
1478
-
1479
- lea ALL_F+16 (%rip ), %r12
1480
- sub %r13 , %r12
1481
- pxor %xmm1 , %xmm0 # Plaintext XOR Encrypt(K, Yn)
1482
- movdqu (%r12 ), %xmm1
1483
- # get the appropriate mask to mask out top 16-r13 bytes of xmm0
1484
- pand %xmm1 , %xmm0 # mask out top 16-r13 bytes of xmm0
1485
- movdqa SHUF_MASK(%rip ), %xmm10
1486
- PSHUFB_XMM %xmm10 ,%xmm0
1487
-
1488
- pxor %xmm0 , %xmm8
1489
- GHASH_MUL %xmm8 , %xmm13 , %xmm9 , %xmm10 , %xmm11 , %xmm5 , %xmm6
1490
- # GHASH computation for the last <16 byte block
1491
- movdqa SHUF_MASK(%rip ), %xmm10
1492
- PSHUFB_XMM %xmm10 , %xmm0
1493
-
1494
- # shuffle xmm0 back to output as ciphertext
1495
-
1496
- # Output %r13 bytes
1497
- MOVQ_R64_XMM %xmm0 , %rax
1498
- cmp $8 , %r13
1499
- jle _less_than_8_bytes_left_encrypt
1500
- mov %rax , (%arg2 , %r11 , 1 )
1501
- add $8 , %r11
1502
- psrldq $8 , %xmm0
1503
- MOVQ_R64_XMM %xmm0 , %rax
1504
- sub $8 , %r13
1505
- _less_than_8_bytes_left_encrypt:
1506
- mov %al , (%arg2, %r11 , 1 )
1507
- add $1 , %r11
1508
- shr $8 , %rax
1509
- sub $1 , %r13
1510
- jne _less_than_8_bytes_left_encrypt
1511
- _multiple_of_16_bytes_encrypt:
1512
- _return_T_encrypt:
1447
+ GCM_ENC_DEC enc
1513
1448
GCM_COMPLETE
1514
1449
FUNC_RESTORE
1515
1450
ret
0 commit comments