@@ -172,26 +172,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
172
172
;
173
173
; GFNI-LABEL: test_bitreverse_i64:
174
174
; GFNI: # %bb.0:
175
- ; GFNI-NEXT: bswapq %rdi
176
- ; GFNI-NEXT: movq %rdi, %rax
177
- ; GFNI-NEXT: shrq $4, %rax
178
- ; GFNI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
179
- ; GFNI-NEXT: andq %rcx, %rax
180
- ; GFNI-NEXT: andq %rcx, %rdi
181
- ; GFNI-NEXT: shlq $4, %rdi
182
- ; GFNI-NEXT: orq %rax, %rdi
183
- ; GFNI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
184
- ; GFNI-NEXT: movq %rdi, %rcx
185
- ; GFNI-NEXT: andq %rax, %rcx
186
- ; GFNI-NEXT: shrq $2, %rdi
187
- ; GFNI-NEXT: andq %rax, %rdi
188
- ; GFNI-NEXT: leaq (%rdi,%rcx,4), %rax
189
- ; GFNI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
190
- ; GFNI-NEXT: movq %rax, %rdx
191
- ; GFNI-NEXT: andq %rcx, %rdx
192
- ; GFNI-NEXT: shrq %rax
193
- ; GFNI-NEXT: andq %rcx, %rax
194
- ; GFNI-NEXT: leaq (%rax,%rdx,2), %rax
175
+ ; GFNI-NEXT: vmovq %rdi, %xmm0
176
+ ; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
177
+ ; GFNI-NEXT: vmovq %xmm0, %rax
178
+ ; GFNI-NEXT: bswapq %rax
195
179
; GFNI-NEXT: retq
196
180
%b = call i64 @llvm.bitreverse.i64 (i64 %a )
197
181
ret i64 %b
@@ -253,24 +237,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
253
237
;
254
238
; GFNI-LABEL: test_bitreverse_i32:
255
239
; GFNI: # %bb.0:
256
- ; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
257
- ; GFNI-NEXT: bswapl %edi
258
- ; GFNI-NEXT: movl %edi, %eax
259
- ; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
260
- ; GFNI-NEXT: shll $4, %eax
261
- ; GFNI-NEXT: shrl $4, %edi
262
- ; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
263
- ; GFNI-NEXT: orl %eax, %edi
264
- ; GFNI-NEXT: movl %edi, %eax
265
- ; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333
266
- ; GFNI-NEXT: shrl $2, %edi
267
- ; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333
268
- ; GFNI-NEXT: leal (%rdi,%rax,4), %eax
269
- ; GFNI-NEXT: movl %eax, %ecx
270
- ; GFNI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
271
- ; GFNI-NEXT: shrl %eax
272
- ; GFNI-NEXT: andl $1431655765, %eax # imm = 0x55555555
273
- ; GFNI-NEXT: leal (%rax,%rcx,2), %eax
240
+ ; GFNI-NEXT: vmovd %edi, %xmm0
241
+ ; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
242
+ ; GFNI-NEXT: vmovd %xmm0, %eax
243
+ ; GFNI-NEXT: bswapl %eax
274
244
; GFNI-NEXT: retq
275
245
%b = call i32 @llvm.bitreverse.i32 (i32 %a )
276
246
ret i32 %b
@@ -335,24 +305,10 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
335
305
;
336
306
; GFNI-LABEL: test_bitreverse_i24:
337
307
; GFNI: # %bb.0:
338
- ; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
339
- ; GFNI-NEXT: bswapl %edi
340
- ; GFNI-NEXT: movl %edi, %eax
341
- ; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
342
- ; GFNI-NEXT: shll $4, %eax
343
- ; GFNI-NEXT: shrl $4, %edi
344
- ; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
345
- ; GFNI-NEXT: orl %eax, %edi
346
- ; GFNI-NEXT: movl %edi, %eax
347
- ; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333
348
- ; GFNI-NEXT: shrl $2, %edi
349
- ; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333
350
- ; GFNI-NEXT: leal (%rdi,%rax,4), %eax
351
- ; GFNI-NEXT: movl %eax, %ecx
352
- ; GFNI-NEXT: andl $1431655680, %ecx # imm = 0x55555500
353
- ; GFNI-NEXT: shrl %eax
354
- ; GFNI-NEXT: andl $1431655680, %eax # imm = 0x55555500
355
- ; GFNI-NEXT: leal (%rax,%rcx,2), %eax
308
+ ; GFNI-NEXT: vmovd %edi, %xmm0
309
+ ; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
310
+ ; GFNI-NEXT: vmovd %xmm0, %eax
311
+ ; GFNI-NEXT: bswapl %eax
356
312
; GFNI-NEXT: shrl $8, %eax
357
313
; GFNI-NEXT: retq
358
314
%b = call i24 @llvm.bitreverse.i24 (i24 %a )
@@ -1412,196 +1368,67 @@ define i528 @large_promotion(i528 %A) nounwind {
1412
1368
;
1413
1369
; GFNI-LABEL: large_promotion:
1414
1370
; GFNI: # %bb.0:
1415
- ; GFNI-NEXT: pushq %r15
1416
1371
; GFNI-NEXT: pushq %r14
1417
- ; GFNI-NEXT: pushq %r13
1418
- ; GFNI-NEXT: pushq %r12
1419
1372
; GFNI-NEXT: pushq %rbx
1420
1373
; GFNI-NEXT: movq %rdi, %rax
1421
- ; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r12
1422
- ; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r15
1423
- ; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rbx
1424
- ; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
1374
+ ; GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9241421688590303745,9241421688590303745]
1375
+ ; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1376
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1377
+ ; GFNI-NEXT: vmovq %xmm1, %r10
1378
+ ; GFNI-NEXT: bswapq %r10
1379
+ ; GFNI-NEXT: vmovq %r9, %xmm1
1380
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1381
+ ; GFNI-NEXT: vmovq %xmm1, %rdi
1425
1382
; GFNI-NEXT: bswapq %rdi
1426
- ; GFNI-NEXT: movq %rdi, %r10
1427
- ; GFNI-NEXT: shrq $4, %r10
1428
- ; GFNI-NEXT: movabsq $1085102592571150095, %r11 # imm = 0xF0F0F0F0F0F0F0F
1429
- ; GFNI-NEXT: andq %r11, %r10
1430
- ; GFNI-NEXT: andq %r11, %rdi
1431
- ; GFNI-NEXT: shlq $4, %rdi
1432
- ; GFNI-NEXT: orq %r10, %rdi
1433
- ; GFNI-NEXT: movabsq $3689348814741910323, %r10 # imm = 0x3333333333333333
1434
- ; GFNI-NEXT: movq %rdi, %r14
1435
- ; GFNI-NEXT: andq %r10, %r14
1436
- ; GFNI-NEXT: shrq $2, %rdi
1437
- ; GFNI-NEXT: andq %r10, %rdi
1438
- ; GFNI-NEXT: leaq (%rdi,%r14,4), %rdi
1439
- ; GFNI-NEXT: movabsq $6148820866244280320, %r14 # imm = 0x5555000000000000
1440
- ; GFNI-NEXT: movq %rdi, %r13
1441
- ; GFNI-NEXT: andq %r14, %r13
1442
- ; GFNI-NEXT: shrq %rdi
1443
- ; GFNI-NEXT: andq %r14, %rdi
1444
- ; GFNI-NEXT: leaq (%rdi,%r13,2), %rdi
1445
- ; GFNI-NEXT: bswapq %rbx
1446
- ; GFNI-NEXT: movq %rbx, %r14
1447
- ; GFNI-NEXT: shrq $4, %r14
1448
- ; GFNI-NEXT: andq %r11, %r14
1449
- ; GFNI-NEXT: andq %r11, %rbx
1450
- ; GFNI-NEXT: shlq $4, %rbx
1451
- ; GFNI-NEXT: orq %r14, %rbx
1452
- ; GFNI-NEXT: movq %rbx, %r14
1453
- ; GFNI-NEXT: andq %r10, %r14
1454
- ; GFNI-NEXT: shrq $2, %rbx
1455
- ; GFNI-NEXT: andq %r10, %rbx
1456
- ; GFNI-NEXT: leaq (%rbx,%r14,4), %rbx
1457
- ; GFNI-NEXT: movabsq $6148914691236517205, %r14 # imm = 0x5555555555555555
1458
- ; GFNI-NEXT: movq %rbx, %r13
1459
- ; GFNI-NEXT: andq %r14, %r13
1460
- ; GFNI-NEXT: shrq %rbx
1461
- ; GFNI-NEXT: andq %r14, %rbx
1462
- ; GFNI-NEXT: leaq (%rbx,%r13,2), %rbx
1463
- ; GFNI-NEXT: shrdq $48, %rbx, %rdi
1464
- ; GFNI-NEXT: bswapq %r15
1465
- ; GFNI-NEXT: movq %r15, %r13
1466
- ; GFNI-NEXT: shrq $4, %r13
1467
- ; GFNI-NEXT: andq %r11, %r13
1468
- ; GFNI-NEXT: andq %r11, %r15
1469
- ; GFNI-NEXT: shlq $4, %r15
1470
- ; GFNI-NEXT: orq %r13, %r15
1471
- ; GFNI-NEXT: movq %r15, %r13
1472
- ; GFNI-NEXT: andq %r10, %r13
1473
- ; GFNI-NEXT: shrq $2, %r15
1474
- ; GFNI-NEXT: andq %r10, %r15
1475
- ; GFNI-NEXT: leaq (%r15,%r13,4), %r15
1476
- ; GFNI-NEXT: movq %r15, %r13
1477
- ; GFNI-NEXT: andq %r14, %r13
1478
- ; GFNI-NEXT: shrq %r15
1479
- ; GFNI-NEXT: andq %r14, %r15
1480
- ; GFNI-NEXT: leaq (%r15,%r13,2), %r15
1481
- ; GFNI-NEXT: shrdq $48, %r15, %rbx
1482
- ; GFNI-NEXT: bswapq %r12
1483
- ; GFNI-NEXT: movq %r12, %r13
1484
- ; GFNI-NEXT: shrq $4, %r13
1485
- ; GFNI-NEXT: andq %r11, %r13
1486
- ; GFNI-NEXT: andq %r11, %r12
1487
- ; GFNI-NEXT: shlq $4, %r12
1488
- ; GFNI-NEXT: orq %r13, %r12
1489
- ; GFNI-NEXT: movq %r12, %r13
1490
- ; GFNI-NEXT: andq %r10, %r13
1491
- ; GFNI-NEXT: shrq $2, %r12
1492
- ; GFNI-NEXT: andq %r10, %r12
1493
- ; GFNI-NEXT: leaq (%r12,%r13,4), %r12
1494
- ; GFNI-NEXT: movq %r12, %r13
1495
- ; GFNI-NEXT: andq %r14, %r13
1496
- ; GFNI-NEXT: shrq %r12
1497
- ; GFNI-NEXT: andq %r14, %r12
1498
- ; GFNI-NEXT: leaq (%r12,%r13,2), %r12
1499
- ; GFNI-NEXT: shrdq $48, %r12, %r15
1500
- ; GFNI-NEXT: bswapq %r9
1501
- ; GFNI-NEXT: movq %r9, %r13
1502
- ; GFNI-NEXT: shrq $4, %r13
1503
- ; GFNI-NEXT: andq %r11, %r13
1504
- ; GFNI-NEXT: andq %r11, %r9
1505
- ; GFNI-NEXT: shlq $4, %r9
1506
- ; GFNI-NEXT: orq %r13, %r9
1507
- ; GFNI-NEXT: movq %r9, %r13
1508
- ; GFNI-NEXT: andq %r10, %r13
1509
- ; GFNI-NEXT: shrq $2, %r9
1510
- ; GFNI-NEXT: andq %r10, %r9
1511
- ; GFNI-NEXT: leaq (%r9,%r13,4), %r9
1512
- ; GFNI-NEXT: movq %r9, %r13
1513
- ; GFNI-NEXT: andq %r14, %r13
1514
- ; GFNI-NEXT: shrq %r9
1515
- ; GFNI-NEXT: andq %r14, %r9
1516
- ; GFNI-NEXT: leaq (%r9,%r13,2), %r9
1517
- ; GFNI-NEXT: shrdq $48, %r9, %r12
1383
+ ; GFNI-NEXT: vmovq %r8, %xmm1
1384
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1385
+ ; GFNI-NEXT: vmovq %xmm1, %r8
1518
1386
; GFNI-NEXT: bswapq %r8
1519
- ; GFNI-NEXT: movq %r8, %r13
1520
- ; GFNI-NEXT: shrq $4, %r13
1521
- ; GFNI-NEXT: andq %r11, %r13
1522
- ; GFNI-NEXT: andq %r11, %r8
1523
- ; GFNI-NEXT: shlq $4, %r8
1524
- ; GFNI-NEXT: orq %r13, %r8
1525
- ; GFNI-NEXT: movq %r8, %r13
1526
- ; GFNI-NEXT: andq %r10, %r13
1527
- ; GFNI-NEXT: shrq $2, %r8
1528
- ; GFNI-NEXT: andq %r10, %r8
1529
- ; GFNI-NEXT: leaq (%r8,%r13,4), %r8
1530
- ; GFNI-NEXT: movq %r8, %r13
1531
- ; GFNI-NEXT: andq %r14, %r13
1532
- ; GFNI-NEXT: shrq %r8
1533
- ; GFNI-NEXT: andq %r14, %r8
1534
- ; GFNI-NEXT: leaq (%r8,%r13,2), %r8
1535
- ; GFNI-NEXT: shrdq $48, %r8, %r9
1387
+ ; GFNI-NEXT: movq %r8, %r9
1388
+ ; GFNI-NEXT: shldq $16, %rdi, %r9
1389
+ ; GFNI-NEXT: shldq $16, %r10, %rdi
1390
+ ; GFNI-NEXT: vmovq %rcx, %xmm1
1391
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1392
+ ; GFNI-NEXT: vmovq %xmm1, %rcx
1536
1393
; GFNI-NEXT: bswapq %rcx
1537
- ; GFNI-NEXT: movq %rcx, %r13
1538
- ; GFNI-NEXT: shrq $4, %r13
1539
- ; GFNI-NEXT: andq %r11, %r13
1540
- ; GFNI-NEXT: andq %r11, %rcx
1541
- ; GFNI-NEXT: shlq $4, %rcx
1542
- ; GFNI-NEXT: orq %r13, %rcx
1543
- ; GFNI-NEXT: movq %rcx, %r13
1544
- ; GFNI-NEXT: andq %r10, %r13
1545
- ; GFNI-NEXT: shrq $2, %rcx
1546
- ; GFNI-NEXT: andq %r10, %rcx
1547
- ; GFNI-NEXT: leaq (%rcx,%r13,4), %rcx
1548
- ; GFNI-NEXT: movq %rcx, %r13
1549
- ; GFNI-NEXT: andq %r14, %r13
1550
- ; GFNI-NEXT: shrq %rcx
1551
- ; GFNI-NEXT: andq %r14, %rcx
1552
- ; GFNI-NEXT: leaq (%rcx,%r13,2), %rcx
1553
1394
; GFNI-NEXT: shrdq $48, %rcx, %r8
1395
+ ; GFNI-NEXT: vmovq %rdx, %xmm1
1396
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1397
+ ; GFNI-NEXT: vmovq %xmm1, %rdx
1554
1398
; GFNI-NEXT: bswapq %rdx
1555
- ; GFNI-NEXT: movq %rdx, %r13
1556
- ; GFNI-NEXT: shrq $4, %r13
1557
- ; GFNI-NEXT: andq %r11, %r13
1558
- ; GFNI-NEXT: andq %r11, %rdx
1559
- ; GFNI-NEXT: shlq $4, %rdx
1560
- ; GFNI-NEXT: orq %r13, %rdx
1561
- ; GFNI-NEXT: movq %rdx, %r13
1562
- ; GFNI-NEXT: andq %r10, %r13
1563
- ; GFNI-NEXT: shrq $2, %rdx
1564
- ; GFNI-NEXT: andq %r10, %rdx
1565
- ; GFNI-NEXT: leaq (%rdx,%r13,4), %rdx
1566
- ; GFNI-NEXT: movq %rdx, %r13
1567
- ; GFNI-NEXT: andq %r14, %r13
1568
- ; GFNI-NEXT: shrq %rdx
1569
- ; GFNI-NEXT: andq %r14, %rdx
1570
- ; GFNI-NEXT: leaq (%rdx,%r13,2), %rdx
1571
1399
; GFNI-NEXT: shrdq $48, %rdx, %rcx
1400
+ ; GFNI-NEXT: vmovq %rsi, %xmm1
1401
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1402
+ ; GFNI-NEXT: vmovq %xmm1, %rsi
1572
1403
; GFNI-NEXT: bswapq %rsi
1573
- ; GFNI-NEXT: movq %rsi, %r13
1574
- ; GFNI-NEXT: shrq $4, %r13
1575
- ; GFNI-NEXT: andq %r11, %r13
1576
- ; GFNI-NEXT: andq %r11, %rsi
1577
- ; GFNI-NEXT: shlq $4, %rsi
1578
- ; GFNI-NEXT: orq %r13, %rsi
1579
- ; GFNI-NEXT: movq %rsi, %r11
1580
- ; GFNI-NEXT: andq %r10, %r11
1581
- ; GFNI-NEXT: shrq $2, %rsi
1582
- ; GFNI-NEXT: andq %r10, %rsi
1583
- ; GFNI-NEXT: leaq (%rsi,%r11,4), %rsi
1584
- ; GFNI-NEXT: movq %rsi, %r10
1585
- ; GFNI-NEXT: andq %r14, %r10
1586
- ; GFNI-NEXT: shrq %rsi
1587
- ; GFNI-NEXT: andq %r14, %rsi
1588
- ; GFNI-NEXT: leaq (%rsi,%r10,2), %rsi
1589
1404
; GFNI-NEXT: shrdq $48, %rsi, %rdx
1405
+ ; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1406
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1407
+ ; GFNI-NEXT: vmovq %xmm1, %r11
1408
+ ; GFNI-NEXT: bswapq %r11
1409
+ ; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1410
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1411
+ ; GFNI-NEXT: vmovq %xmm1, %rbx
1412
+ ; GFNI-NEXT: bswapq %rbx
1413
+ ; GFNI-NEXT: shrdq $48, %rbx, %r11
1414
+ ; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1415
+ ; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
1416
+ ; GFNI-NEXT: vmovq %xmm0, %r14
1417
+ ; GFNI-NEXT: bswapq %r14
1418
+ ; GFNI-NEXT: shrdq $48, %r14, %rbx
1419
+ ; GFNI-NEXT: shrdq $48, %r10, %r14
1590
1420
; GFNI-NEXT: shrq $48, %rsi
1421
+ ; GFNI-NEXT: movq %r14, 16(%rax)
1422
+ ; GFNI-NEXT: movq %rbx, 8(%rax)
1423
+ ; GFNI-NEXT: movq %r11, (%rax)
1591
1424
; GFNI-NEXT: movq %rdx, 56(%rax)
1592
1425
; GFNI-NEXT: movq %rcx, 48(%rax)
1593
1426
; GFNI-NEXT: movq %r8, 40(%rax)
1594
1427
; GFNI-NEXT: movq %r9, 32(%rax)
1595
- ; GFNI-NEXT: movq %r12, 24(%rax)
1596
- ; GFNI-NEXT: movq %r15, 16(%rax)
1597
- ; GFNI-NEXT: movq %rbx, 8(%rax)
1598
- ; GFNI-NEXT: movq %rdi, (%rax)
1428
+ ; GFNI-NEXT: movq %rdi, 24(%rax)
1599
1429
; GFNI-NEXT: movw %si, 64(%rax)
1600
1430
; GFNI-NEXT: popq %rbx
1601
- ; GFNI-NEXT: popq %r12
1602
- ; GFNI-NEXT: popq %r13
1603
1431
; GFNI-NEXT: popq %r14
1604
- ; GFNI-NEXT: popq %r15
1605
1432
; GFNI-NEXT: retq
1606
1433
%Z = call i528 @llvm.bitreverse.i528 (i528 %A )
1607
1434
ret i528 %Z
0 commit comments