Skip to content

Commit e3ef461

Browse files
shamithokeshami
andauthored
Perform bitreverse using AVX512 GFNI for i32 and i64. (#81764)
Currently, the lowering operation for bitreverse using Intel AVX512 GFNI only supports byte vectors Extend the operation to i32 and i64. --------- Co-authored-by: shami <[email protected]>
1 parent ca6b846 commit e3ef461

File tree

3 files changed

+84
-265
lines changed

3 files changed

+84
-265
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,6 +1496,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
14961496
setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
14971497
setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
14981498

1499+
if (Subtarget.hasGFNI()) {
1500+
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
1501+
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
1502+
}
1503+
14991504
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
15001505
setOperationAction(ISD::SETCC, VT, Custom);
15011506
setOperationAction(ISD::CTPOP, VT, Custom);
@@ -31332,6 +31337,23 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3133231337
if (VT.is256BitVector() && !Subtarget.hasInt256())
3133331338
return splitVectorIntUnary(Op, DAG, DL);
3133431339

31340+
// Lower i32/i64 to GFNI as vXi8 BITREVERSE + BSWAP
31341+
if (!VT.isVector()) {
31342+
31343+
assert((VT.getScalarType() == MVT::i32) ||
31344+
(VT.getScalarType() == MVT::i64));
31345+
31346+
MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
31347+
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
31348+
Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
31349+
DAG.getBitcast(MVT::v16i8, Res));
31350+
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
31351+
DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
31352+
return DAG.getNode(ISD::BSWAP, DL, VT, Res);
31353+
}
31354+
31355+
assert(VT.isVector() && VT.getSizeInBits() >= 128);
31356+
3133531357
// Lower vXi16/vXi32/vXi64 as BSWAP + vXi8 BITREVERSE.
3133631358
if (VT.getScalarType() != MVT::i8) {
3133731359
MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);

llvm/test/CodeGen/X86/bitreverse.ll

Lines changed: 54 additions & 227 deletions
Original file line numberDiff line numberDiff line change
@@ -172,26 +172,10 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
172172
;
173173
; GFNI-LABEL: test_bitreverse_i64:
174174
; GFNI: # %bb.0:
175-
; GFNI-NEXT: bswapq %rdi
176-
; GFNI-NEXT: movq %rdi, %rax
177-
; GFNI-NEXT: shrq $4, %rax
178-
; GFNI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
179-
; GFNI-NEXT: andq %rcx, %rax
180-
; GFNI-NEXT: andq %rcx, %rdi
181-
; GFNI-NEXT: shlq $4, %rdi
182-
; GFNI-NEXT: orq %rax, %rdi
183-
; GFNI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
184-
; GFNI-NEXT: movq %rdi, %rcx
185-
; GFNI-NEXT: andq %rax, %rcx
186-
; GFNI-NEXT: shrq $2, %rdi
187-
; GFNI-NEXT: andq %rax, %rdi
188-
; GFNI-NEXT: leaq (%rdi,%rcx,4), %rax
189-
; GFNI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
190-
; GFNI-NEXT: movq %rax, %rdx
191-
; GFNI-NEXT: andq %rcx, %rdx
192-
; GFNI-NEXT: shrq %rax
193-
; GFNI-NEXT: andq %rcx, %rax
194-
; GFNI-NEXT: leaq (%rax,%rdx,2), %rax
175+
; GFNI-NEXT: vmovq %rdi, %xmm0
176+
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
177+
; GFNI-NEXT: vmovq %xmm0, %rax
178+
; GFNI-NEXT: bswapq %rax
195179
; GFNI-NEXT: retq
196180
%b = call i64 @llvm.bitreverse.i64(i64 %a)
197181
ret i64 %b
@@ -253,24 +237,10 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
253237
;
254238
; GFNI-LABEL: test_bitreverse_i32:
255239
; GFNI: # %bb.0:
256-
; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
257-
; GFNI-NEXT: bswapl %edi
258-
; GFNI-NEXT: movl %edi, %eax
259-
; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
260-
; GFNI-NEXT: shll $4, %eax
261-
; GFNI-NEXT: shrl $4, %edi
262-
; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
263-
; GFNI-NEXT: orl %eax, %edi
264-
; GFNI-NEXT: movl %edi, %eax
265-
; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333
266-
; GFNI-NEXT: shrl $2, %edi
267-
; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333
268-
; GFNI-NEXT: leal (%rdi,%rax,4), %eax
269-
; GFNI-NEXT: movl %eax, %ecx
270-
; GFNI-NEXT: andl $1431655765, %ecx # imm = 0x55555555
271-
; GFNI-NEXT: shrl %eax
272-
; GFNI-NEXT: andl $1431655765, %eax # imm = 0x55555555
273-
; GFNI-NEXT: leal (%rax,%rcx,2), %eax
240+
; GFNI-NEXT: vmovd %edi, %xmm0
241+
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
242+
; GFNI-NEXT: vmovd %xmm0, %eax
243+
; GFNI-NEXT: bswapl %eax
274244
; GFNI-NEXT: retq
275245
%b = call i32 @llvm.bitreverse.i32(i32 %a)
276246
ret i32 %b
@@ -335,24 +305,10 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
335305
;
336306
; GFNI-LABEL: test_bitreverse_i24:
337307
; GFNI: # %bb.0:
338-
; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
339-
; GFNI-NEXT: bswapl %edi
340-
; GFNI-NEXT: movl %edi, %eax
341-
; GFNI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
342-
; GFNI-NEXT: shll $4, %eax
343-
; GFNI-NEXT: shrl $4, %edi
344-
; GFNI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
345-
; GFNI-NEXT: orl %eax, %edi
346-
; GFNI-NEXT: movl %edi, %eax
347-
; GFNI-NEXT: andl $858993459, %eax # imm = 0x33333333
348-
; GFNI-NEXT: shrl $2, %edi
349-
; GFNI-NEXT: andl $858993459, %edi # imm = 0x33333333
350-
; GFNI-NEXT: leal (%rdi,%rax,4), %eax
351-
; GFNI-NEXT: movl %eax, %ecx
352-
; GFNI-NEXT: andl $1431655680, %ecx # imm = 0x55555500
353-
; GFNI-NEXT: shrl %eax
354-
; GFNI-NEXT: andl $1431655680, %eax # imm = 0x55555500
355-
; GFNI-NEXT: leal (%rax,%rcx,2), %eax
308+
; GFNI-NEXT: vmovd %edi, %xmm0
309+
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
310+
; GFNI-NEXT: vmovd %xmm0, %eax
311+
; GFNI-NEXT: bswapl %eax
356312
; GFNI-NEXT: shrl $8, %eax
357313
; GFNI-NEXT: retq
358314
%b = call i24 @llvm.bitreverse.i24(i24 %a)
@@ -1412,196 +1368,67 @@ define i528 @large_promotion(i528 %A) nounwind {
14121368
;
14131369
; GFNI-LABEL: large_promotion:
14141370
; GFNI: # %bb.0:
1415-
; GFNI-NEXT: pushq %r15
14161371
; GFNI-NEXT: pushq %r14
1417-
; GFNI-NEXT: pushq %r13
1418-
; GFNI-NEXT: pushq %r12
14191372
; GFNI-NEXT: pushq %rbx
14201373
; GFNI-NEXT: movq %rdi, %rax
1421-
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r12
1422-
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %r15
1423-
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rbx
1424-
; GFNI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
1374+
; GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9241421688590303745,9241421688590303745]
1375+
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1376+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1377+
; GFNI-NEXT: vmovq %xmm1, %r10
1378+
; GFNI-NEXT: bswapq %r10
1379+
; GFNI-NEXT: vmovq %r9, %xmm1
1380+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1381+
; GFNI-NEXT: vmovq %xmm1, %rdi
14251382
; GFNI-NEXT: bswapq %rdi
1426-
; GFNI-NEXT: movq %rdi, %r10
1427-
; GFNI-NEXT: shrq $4, %r10
1428-
; GFNI-NEXT: movabsq $1085102592571150095, %r11 # imm = 0xF0F0F0F0F0F0F0F
1429-
; GFNI-NEXT: andq %r11, %r10
1430-
; GFNI-NEXT: andq %r11, %rdi
1431-
; GFNI-NEXT: shlq $4, %rdi
1432-
; GFNI-NEXT: orq %r10, %rdi
1433-
; GFNI-NEXT: movabsq $3689348814741910323, %r10 # imm = 0x3333333333333333
1434-
; GFNI-NEXT: movq %rdi, %r14
1435-
; GFNI-NEXT: andq %r10, %r14
1436-
; GFNI-NEXT: shrq $2, %rdi
1437-
; GFNI-NEXT: andq %r10, %rdi
1438-
; GFNI-NEXT: leaq (%rdi,%r14,4), %rdi
1439-
; GFNI-NEXT: movabsq $6148820866244280320, %r14 # imm = 0x5555000000000000
1440-
; GFNI-NEXT: movq %rdi, %r13
1441-
; GFNI-NEXT: andq %r14, %r13
1442-
; GFNI-NEXT: shrq %rdi
1443-
; GFNI-NEXT: andq %r14, %rdi
1444-
; GFNI-NEXT: leaq (%rdi,%r13,2), %rdi
1445-
; GFNI-NEXT: bswapq %rbx
1446-
; GFNI-NEXT: movq %rbx, %r14
1447-
; GFNI-NEXT: shrq $4, %r14
1448-
; GFNI-NEXT: andq %r11, %r14
1449-
; GFNI-NEXT: andq %r11, %rbx
1450-
; GFNI-NEXT: shlq $4, %rbx
1451-
; GFNI-NEXT: orq %r14, %rbx
1452-
; GFNI-NEXT: movq %rbx, %r14
1453-
; GFNI-NEXT: andq %r10, %r14
1454-
; GFNI-NEXT: shrq $2, %rbx
1455-
; GFNI-NEXT: andq %r10, %rbx
1456-
; GFNI-NEXT: leaq (%rbx,%r14,4), %rbx
1457-
; GFNI-NEXT: movabsq $6148914691236517205, %r14 # imm = 0x5555555555555555
1458-
; GFNI-NEXT: movq %rbx, %r13
1459-
; GFNI-NEXT: andq %r14, %r13
1460-
; GFNI-NEXT: shrq %rbx
1461-
; GFNI-NEXT: andq %r14, %rbx
1462-
; GFNI-NEXT: leaq (%rbx,%r13,2), %rbx
1463-
; GFNI-NEXT: shrdq $48, %rbx, %rdi
1464-
; GFNI-NEXT: bswapq %r15
1465-
; GFNI-NEXT: movq %r15, %r13
1466-
; GFNI-NEXT: shrq $4, %r13
1467-
; GFNI-NEXT: andq %r11, %r13
1468-
; GFNI-NEXT: andq %r11, %r15
1469-
; GFNI-NEXT: shlq $4, %r15
1470-
; GFNI-NEXT: orq %r13, %r15
1471-
; GFNI-NEXT: movq %r15, %r13
1472-
; GFNI-NEXT: andq %r10, %r13
1473-
; GFNI-NEXT: shrq $2, %r15
1474-
; GFNI-NEXT: andq %r10, %r15
1475-
; GFNI-NEXT: leaq (%r15,%r13,4), %r15
1476-
; GFNI-NEXT: movq %r15, %r13
1477-
; GFNI-NEXT: andq %r14, %r13
1478-
; GFNI-NEXT: shrq %r15
1479-
; GFNI-NEXT: andq %r14, %r15
1480-
; GFNI-NEXT: leaq (%r15,%r13,2), %r15
1481-
; GFNI-NEXT: shrdq $48, %r15, %rbx
1482-
; GFNI-NEXT: bswapq %r12
1483-
; GFNI-NEXT: movq %r12, %r13
1484-
; GFNI-NEXT: shrq $4, %r13
1485-
; GFNI-NEXT: andq %r11, %r13
1486-
; GFNI-NEXT: andq %r11, %r12
1487-
; GFNI-NEXT: shlq $4, %r12
1488-
; GFNI-NEXT: orq %r13, %r12
1489-
; GFNI-NEXT: movq %r12, %r13
1490-
; GFNI-NEXT: andq %r10, %r13
1491-
; GFNI-NEXT: shrq $2, %r12
1492-
; GFNI-NEXT: andq %r10, %r12
1493-
; GFNI-NEXT: leaq (%r12,%r13,4), %r12
1494-
; GFNI-NEXT: movq %r12, %r13
1495-
; GFNI-NEXT: andq %r14, %r13
1496-
; GFNI-NEXT: shrq %r12
1497-
; GFNI-NEXT: andq %r14, %r12
1498-
; GFNI-NEXT: leaq (%r12,%r13,2), %r12
1499-
; GFNI-NEXT: shrdq $48, %r12, %r15
1500-
; GFNI-NEXT: bswapq %r9
1501-
; GFNI-NEXT: movq %r9, %r13
1502-
; GFNI-NEXT: shrq $4, %r13
1503-
; GFNI-NEXT: andq %r11, %r13
1504-
; GFNI-NEXT: andq %r11, %r9
1505-
; GFNI-NEXT: shlq $4, %r9
1506-
; GFNI-NEXT: orq %r13, %r9
1507-
; GFNI-NEXT: movq %r9, %r13
1508-
; GFNI-NEXT: andq %r10, %r13
1509-
; GFNI-NEXT: shrq $2, %r9
1510-
; GFNI-NEXT: andq %r10, %r9
1511-
; GFNI-NEXT: leaq (%r9,%r13,4), %r9
1512-
; GFNI-NEXT: movq %r9, %r13
1513-
; GFNI-NEXT: andq %r14, %r13
1514-
; GFNI-NEXT: shrq %r9
1515-
; GFNI-NEXT: andq %r14, %r9
1516-
; GFNI-NEXT: leaq (%r9,%r13,2), %r9
1517-
; GFNI-NEXT: shrdq $48, %r9, %r12
1383+
; GFNI-NEXT: vmovq %r8, %xmm1
1384+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1385+
; GFNI-NEXT: vmovq %xmm1, %r8
15181386
; GFNI-NEXT: bswapq %r8
1519-
; GFNI-NEXT: movq %r8, %r13
1520-
; GFNI-NEXT: shrq $4, %r13
1521-
; GFNI-NEXT: andq %r11, %r13
1522-
; GFNI-NEXT: andq %r11, %r8
1523-
; GFNI-NEXT: shlq $4, %r8
1524-
; GFNI-NEXT: orq %r13, %r8
1525-
; GFNI-NEXT: movq %r8, %r13
1526-
; GFNI-NEXT: andq %r10, %r13
1527-
; GFNI-NEXT: shrq $2, %r8
1528-
; GFNI-NEXT: andq %r10, %r8
1529-
; GFNI-NEXT: leaq (%r8,%r13,4), %r8
1530-
; GFNI-NEXT: movq %r8, %r13
1531-
; GFNI-NEXT: andq %r14, %r13
1532-
; GFNI-NEXT: shrq %r8
1533-
; GFNI-NEXT: andq %r14, %r8
1534-
; GFNI-NEXT: leaq (%r8,%r13,2), %r8
1535-
; GFNI-NEXT: shrdq $48, %r8, %r9
1387+
; GFNI-NEXT: movq %r8, %r9
1388+
; GFNI-NEXT: shldq $16, %rdi, %r9
1389+
; GFNI-NEXT: shldq $16, %r10, %rdi
1390+
; GFNI-NEXT: vmovq %rcx, %xmm1
1391+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1392+
; GFNI-NEXT: vmovq %xmm1, %rcx
15361393
; GFNI-NEXT: bswapq %rcx
1537-
; GFNI-NEXT: movq %rcx, %r13
1538-
; GFNI-NEXT: shrq $4, %r13
1539-
; GFNI-NEXT: andq %r11, %r13
1540-
; GFNI-NEXT: andq %r11, %rcx
1541-
; GFNI-NEXT: shlq $4, %rcx
1542-
; GFNI-NEXT: orq %r13, %rcx
1543-
; GFNI-NEXT: movq %rcx, %r13
1544-
; GFNI-NEXT: andq %r10, %r13
1545-
; GFNI-NEXT: shrq $2, %rcx
1546-
; GFNI-NEXT: andq %r10, %rcx
1547-
; GFNI-NEXT: leaq (%rcx,%r13,4), %rcx
1548-
; GFNI-NEXT: movq %rcx, %r13
1549-
; GFNI-NEXT: andq %r14, %r13
1550-
; GFNI-NEXT: shrq %rcx
1551-
; GFNI-NEXT: andq %r14, %rcx
1552-
; GFNI-NEXT: leaq (%rcx,%r13,2), %rcx
15531394
; GFNI-NEXT: shrdq $48, %rcx, %r8
1395+
; GFNI-NEXT: vmovq %rdx, %xmm1
1396+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1397+
; GFNI-NEXT: vmovq %xmm1, %rdx
15541398
; GFNI-NEXT: bswapq %rdx
1555-
; GFNI-NEXT: movq %rdx, %r13
1556-
; GFNI-NEXT: shrq $4, %r13
1557-
; GFNI-NEXT: andq %r11, %r13
1558-
; GFNI-NEXT: andq %r11, %rdx
1559-
; GFNI-NEXT: shlq $4, %rdx
1560-
; GFNI-NEXT: orq %r13, %rdx
1561-
; GFNI-NEXT: movq %rdx, %r13
1562-
; GFNI-NEXT: andq %r10, %r13
1563-
; GFNI-NEXT: shrq $2, %rdx
1564-
; GFNI-NEXT: andq %r10, %rdx
1565-
; GFNI-NEXT: leaq (%rdx,%r13,4), %rdx
1566-
; GFNI-NEXT: movq %rdx, %r13
1567-
; GFNI-NEXT: andq %r14, %r13
1568-
; GFNI-NEXT: shrq %rdx
1569-
; GFNI-NEXT: andq %r14, %rdx
1570-
; GFNI-NEXT: leaq (%rdx,%r13,2), %rdx
15711399
; GFNI-NEXT: shrdq $48, %rdx, %rcx
1400+
; GFNI-NEXT: vmovq %rsi, %xmm1
1401+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1402+
; GFNI-NEXT: vmovq %xmm1, %rsi
15721403
; GFNI-NEXT: bswapq %rsi
1573-
; GFNI-NEXT: movq %rsi, %r13
1574-
; GFNI-NEXT: shrq $4, %r13
1575-
; GFNI-NEXT: andq %r11, %r13
1576-
; GFNI-NEXT: andq %r11, %rsi
1577-
; GFNI-NEXT: shlq $4, %rsi
1578-
; GFNI-NEXT: orq %r13, %rsi
1579-
; GFNI-NEXT: movq %rsi, %r11
1580-
; GFNI-NEXT: andq %r10, %r11
1581-
; GFNI-NEXT: shrq $2, %rsi
1582-
; GFNI-NEXT: andq %r10, %rsi
1583-
; GFNI-NEXT: leaq (%rsi,%r11,4), %rsi
1584-
; GFNI-NEXT: movq %rsi, %r10
1585-
; GFNI-NEXT: andq %r14, %r10
1586-
; GFNI-NEXT: shrq %rsi
1587-
; GFNI-NEXT: andq %r14, %rsi
1588-
; GFNI-NEXT: leaq (%rsi,%r10,2), %rsi
15891404
; GFNI-NEXT: shrdq $48, %rsi, %rdx
1405+
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1406+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1407+
; GFNI-NEXT: vmovq %xmm1, %r11
1408+
; GFNI-NEXT: bswapq %r11
1409+
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1410+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
1411+
; GFNI-NEXT: vmovq %xmm1, %rbx
1412+
; GFNI-NEXT: bswapq %rbx
1413+
; GFNI-NEXT: shrdq $48, %rbx, %r11
1414+
; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1415+
; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
1416+
; GFNI-NEXT: vmovq %xmm0, %r14
1417+
; GFNI-NEXT: bswapq %r14
1418+
; GFNI-NEXT: shrdq $48, %r14, %rbx
1419+
; GFNI-NEXT: shrdq $48, %r10, %r14
15901420
; GFNI-NEXT: shrq $48, %rsi
1421+
; GFNI-NEXT: movq %r14, 16(%rax)
1422+
; GFNI-NEXT: movq %rbx, 8(%rax)
1423+
; GFNI-NEXT: movq %r11, (%rax)
15911424
; GFNI-NEXT: movq %rdx, 56(%rax)
15921425
; GFNI-NEXT: movq %rcx, 48(%rax)
15931426
; GFNI-NEXT: movq %r8, 40(%rax)
15941427
; GFNI-NEXT: movq %r9, 32(%rax)
1595-
; GFNI-NEXT: movq %r12, 24(%rax)
1596-
; GFNI-NEXT: movq %r15, 16(%rax)
1597-
; GFNI-NEXT: movq %rbx, 8(%rax)
1598-
; GFNI-NEXT: movq %rdi, (%rax)
1428+
; GFNI-NEXT: movq %rdi, 24(%rax)
15991429
; GFNI-NEXT: movw %si, 64(%rax)
16001430
; GFNI-NEXT: popq %rbx
1601-
; GFNI-NEXT: popq %r12
1602-
; GFNI-NEXT: popq %r13
16031431
; GFNI-NEXT: popq %r14
1604-
; GFNI-NEXT: popq %r15
16051432
; GFNI-NEXT: retq
16061433
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
16071434
ret i528 %Z

0 commit comments

Comments
 (0)