Skip to content

Commit f5dbf66

Browse files
author
shami
committed
i32/i64 support after code restructure for vectors.
1 parent bcde4ac commit f5dbf66

File tree

2 files changed

+38
-145
lines changed

2 files changed

+38
-145
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,6 +1496,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
14961496
setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
14971497
setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
14981498

1499+
if (Subtarget.hasGFNI()) {
1500+
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
1501+
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
1502+
}
1503+
14991504
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
15001505
setOperationAction(ISD::SETCC, VT, Custom);
15011506
setOperationAction(ISD::CTPOP, VT, Custom);
@@ -31312,53 +31317,18 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
3131231317
return DAG.getBitcast(VT, Res);
3131331318
}
3131431319

31315-
static void createBSWAPShuffleMask(EVT VT, SmallVector<int, 16>& ShuffleMask) {
31316-
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
31317-
for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
31318-
for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
31319-
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
31320-
}
31321-
3132231320
static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3132331321
SelectionDAG &DAG) {
3132431322
MVT VT = Op.getSimpleValueType();
31325-
SDValue In = Op.getOperand(0);
31326-
SDLoc DL(Op);
31327-
31328-
auto HasGFNI = Subtarget.hasGFNI();
31329-
auto ScalarType = VT.getScalarType();
31330-
31331-
if (HasGFNI && ((ScalarType == MVT::i32) || (ScalarType == MVT::i64))) {
31332-
if (VT.isVector()) {
31333-
SmallVector<int, 16> BSWAPMask = createBSWAPShuffleMask(VT);
31334-
MVT ByteVT =
31335-
EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
31336-
SDValue VecShuffle = DAG.getVectorShuffle(
31337-
ByteVT, DL, DAG.getNode(ISD::BITCAST, DL, ByteVT, In),
31338-
DAG.getUNDEF(ByteVT), BSWAPMask);
31339-
SDValue BitReverse = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, VecShuffle);
31340-
return DAG.getBitcast(VT, BitReverse);
31341-
} else {
31342-
auto CastTo = ScalarType == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
31343-
SDValue ScalarToVector =
31344-
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, CastTo, In);
31345-
SDValue BitReverse =
31346-
DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
31347-
DAG.getBitcast(MVT::v16i8, ScalarToVector));
31348-
SDValue ExtractElementZero = DAG.getNode(
31349-
ISD::EXTRACT_VECTOR_ELT, DL, ScalarType,
31350-
DAG.getBitcast(CastTo, BitReverse), DAG.getIntPtrConstant(0, DL));
31351-
return DAG.getNode(ISD::BSWAP, DL, ScalarType, ExtractElementZero);
31352-
}
31353-
}
31354-
31355-
// Split v64i8 without BWI so that we can still use the PSHUFB lowering.
3135631323

3135731324
if (Subtarget.hasXOP() && !VT.is512BitVector())
3135831325
return LowerBITREVERSE_XOP(Op, DAG);
3135931326

3136031327
assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
3136131328

31329+
SDValue In = Op.getOperand(0);
31330+
SDLoc DL(Op);
31331+
3136231332
// Split 512-bit ops without BWI so that we can still use the PSHUFB lowering.
3136331333
if (VT.is512BitVector() && !Subtarget.hasBWI())
3136431334
return splitVectorIntUnary(Op, DAG, DL);
@@ -31367,6 +31337,21 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3136731337
if (VT.is256BitVector() && !Subtarget.hasInt256())
3136831338
return splitVectorIntUnary(Op, DAG, DL);
3136931339

31340+
// Lower i32/i64 to GFNI as i32/i64 -> Convert to vector (V = v16i32/v8i64) -> vXi8 BITREVERSE -> V[0] -> BSWAP
31341+
if (Subtarget.hasGFNI() && !VT.isVector()) {
31342+
31343+
assert ((VT.getScalarType() == MVT::i32) || (VT.getScalarType() == MVT::i64));
31344+
31345+
auto ScalarType = VT.getScalarType();
31346+
auto CastTo = ScalarType == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
31347+
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, CastTo, In);
31348+
Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8, DAG.getBitcast(MVT::v16i8, Res));
31349+
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarType, DAG.getBitcast(CastTo, Res), DAG.getIntPtrConstant(0, DL));
31350+
return DAG.getNode(ISD::BSWAP, DL, ScalarType, Res);
31351+
}
31352+
31353+
assert (VT.isVector() && VT.getSizeInBits() >= 128);
31354+
3137031355
// Lower vXi16/vXi32/vXi64 as BSWAP + vXi8 BITREVERSE.
3137131356
if (VT.getScalarType() != MVT::i8) {
3137231357
MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
@@ -31381,7 +31366,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3138131366
unsigned NumElts = VT.getVectorNumElements();
3138231367

3138331368
// If we have GFNI, we can use GF2P8AFFINEQB to reverse the bits.
31384-
if (HasGFNI) {
31369+
if (Subtarget.hasGFNI()) {
3138531370
MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8);
3138631371
SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT);
3138731372
Matrix = DAG.getBitcast(VT, Matrix);

llvm/test/CodeGen/X86/vector-bitreverse.ll

Lines changed: 14 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -274,57 +274,13 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
274274
; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax
275275
; GFNISSE-NEXT: retq
276276
;
277-
; GFNIAVX1-LABEL: test_bitreverse_i32:
278-
; GFNIAVX1: # %bb.0:
279-
; GFNIAVX1-NEXT: # kill: def $edi killed $edi def $rdi
280-
; GFNIAVX1-NEXT: bswapl %edi
281-
; GFNIAVX1-NEXT: movl %edi, %eax
282-
; GFNIAVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
283-
; GFNIAVX1-NEXT: shll $4, %eax
284-
; GFNIAVX1-NEXT: shrl $4, %edi
285-
; GFNIAVX1-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
286-
; GFNIAVX1-NEXT: orl %eax, %edi
287-
; GFNIAVX1-NEXT: movl %edi, %eax
288-
; GFNIAVX1-NEXT: andl $858993459, %eax # imm = 0x33333333
289-
; GFNIAVX1-NEXT: shrl $2, %edi
290-
; GFNIAVX1-NEXT: andl $858993459, %edi # imm = 0x33333333
291-
; GFNIAVX1-NEXT: leal (%rdi,%rax,4), %eax
292-
; GFNIAVX1-NEXT: movl %eax, %ecx
293-
; GFNIAVX1-NEXT: andl $1431655765, %ecx # imm = 0x55555555
294-
; GFNIAVX1-NEXT: shrl %eax
295-
; GFNIAVX1-NEXT: andl $1431655765, %eax # imm = 0x55555555
296-
; GFNIAVX1-NEXT: leal (%rax,%rcx,2), %eax
297-
; GFNIAVX1-NEXT: retq
298-
;
299-
; GFNIAVX2-LABEL: test_bitreverse_i32:
300-
; GFNIAVX2: # %bb.0:
301-
; GFNIAVX2-NEXT: # kill: def $edi killed $edi def $rdi
302-
; GFNIAVX2-NEXT: bswapl %edi
303-
; GFNIAVX2-NEXT: movl %edi, %eax
304-
; GFNIAVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
305-
; GFNIAVX2-NEXT: shll $4, %eax
306-
; GFNIAVX2-NEXT: shrl $4, %edi
307-
; GFNIAVX2-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
308-
; GFNIAVX2-NEXT: orl %eax, %edi
309-
; GFNIAVX2-NEXT: movl %edi, %eax
310-
; GFNIAVX2-NEXT: andl $858993459, %eax # imm = 0x33333333
311-
; GFNIAVX2-NEXT: shrl $2, %edi
312-
; GFNIAVX2-NEXT: andl $858993459, %edi # imm = 0x33333333
313-
; GFNIAVX2-NEXT: leal (%rdi,%rax,4), %eax
314-
; GFNIAVX2-NEXT: movl %eax, %ecx
315-
; GFNIAVX2-NEXT: andl $1431655765, %ecx # imm = 0x55555555
316-
; GFNIAVX2-NEXT: shrl %eax
317-
; GFNIAVX2-NEXT: andl $1431655765, %eax # imm = 0x55555555
318-
; GFNIAVX2-NEXT: leal (%rax,%rcx,2), %eax
319-
; GFNIAVX2-NEXT: retq
320-
;
321-
; GFNIAVX512-LABEL: test_bitreverse_i32:
322-
; GFNIAVX512: # %bb.0:
323-
; GFNIAVX512-NEXT: vmovd %edi, %xmm0
324-
; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
325-
; GFNIAVX512-NEXT: vmovd %xmm0, %eax
326-
; GFNIAVX512-NEXT: bswapl %eax
327-
; GFNIAVX512-NEXT: retq
277+
; GFNIAVX-LABEL: test_bitreverse_i32:
278+
; GFNIAVX: # %bb.0:
279+
; GFNIAVX-NEXT: vmovd %edi, %xmm0
280+
; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
281+
; GFNIAVX-NEXT: vmovd %xmm0, %eax
282+
; GFNIAVX-NEXT: bswapl %eax
283+
; GFNIAVX-NEXT: retq
328284
%b = call i32 @llvm.bitreverse.i32(i32 %a)
329285
ret i32 %b
330286
}
@@ -409,61 +365,13 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
409365
; GFNISSE-NEXT: leaq (%rax,%rdx,2), %rax
410366
; GFNISSE-NEXT: retq
411367
;
412-
; GFNIAVX1-LABEL: test_bitreverse_i64:
413-
; GFNIAVX1: # %bb.0:
414-
; GFNIAVX1-NEXT: bswapq %rdi
415-
; GFNIAVX1-NEXT: movq %rdi, %rax
416-
; GFNIAVX1-NEXT: shrq $4, %rax
417-
; GFNIAVX1-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
418-
; GFNIAVX1-NEXT: andq %rcx, %rax
419-
; GFNIAVX1-NEXT: andq %rcx, %rdi
420-
; GFNIAVX1-NEXT: shlq $4, %rdi
421-
; GFNIAVX1-NEXT: orq %rax, %rdi
422-
; GFNIAVX1-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
423-
; GFNIAVX1-NEXT: movq %rdi, %rcx
424-
; GFNIAVX1-NEXT: andq %rax, %rcx
425-
; GFNIAVX1-NEXT: shrq $2, %rdi
426-
; GFNIAVX1-NEXT: andq %rax, %rdi
427-
; GFNIAVX1-NEXT: leaq (%rdi,%rcx,4), %rax
428-
; GFNIAVX1-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
429-
; GFNIAVX1-NEXT: movq %rax, %rdx
430-
; GFNIAVX1-NEXT: andq %rcx, %rdx
431-
; GFNIAVX1-NEXT: shrq %rax
432-
; GFNIAVX1-NEXT: andq %rcx, %rax
433-
; GFNIAVX1-NEXT: leaq (%rax,%rdx,2), %rax
434-
; GFNIAVX1-NEXT: retq
435-
;
436-
; GFNIAVX2-LABEL: test_bitreverse_i64:
437-
; GFNIAVX2: # %bb.0:
438-
; GFNIAVX2-NEXT: bswapq %rdi
439-
; GFNIAVX2-NEXT: movq %rdi, %rax
440-
; GFNIAVX2-NEXT: shrq $4, %rax
441-
; GFNIAVX2-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
442-
; GFNIAVX2-NEXT: andq %rcx, %rax
443-
; GFNIAVX2-NEXT: andq %rcx, %rdi
444-
; GFNIAVX2-NEXT: shlq $4, %rdi
445-
; GFNIAVX2-NEXT: orq %rax, %rdi
446-
; GFNIAVX2-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
447-
; GFNIAVX2-NEXT: movq %rdi, %rcx
448-
; GFNIAVX2-NEXT: andq %rax, %rcx
449-
; GFNIAVX2-NEXT: shrq $2, %rdi
450-
; GFNIAVX2-NEXT: andq %rax, %rdi
451-
; GFNIAVX2-NEXT: leaq (%rdi,%rcx,4), %rax
452-
; GFNIAVX2-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
453-
; GFNIAVX2-NEXT: movq %rax, %rdx
454-
; GFNIAVX2-NEXT: andq %rcx, %rdx
455-
; GFNIAVX2-NEXT: shrq %rax
456-
; GFNIAVX2-NEXT: andq %rcx, %rax
457-
; GFNIAVX2-NEXT: leaq (%rax,%rdx,2), %rax
458-
; GFNIAVX2-NEXT: retq
459-
;
460-
; GFNIAVX512-LABEL: test_bitreverse_i64:
461-
; GFNIAVX512: # %bb.0:
462-
; GFNIAVX512-NEXT: vmovq %rdi, %xmm0
463-
; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
464-
; GFNIAVX512-NEXT: vmovq %xmm0, %rax
465-
; GFNIAVX512-NEXT: bswapq %rax
466-
; GFNIAVX512-NEXT: retq
368+
; GFNIAVX-LABEL: test_bitreverse_i64:
369+
; GFNIAVX: # %bb.0:
370+
; GFNIAVX-NEXT: vmovq %rdi, %xmm0
371+
; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
372+
; GFNIAVX-NEXT: vmovq %xmm0, %rax
373+
; GFNIAVX-NEXT: bswapq %rax
374+
; GFNIAVX-NEXT: retq
467375
%b = call i64 @llvm.bitreverse.i64(i64 %a)
468376
ret i64 %b
469377
}

0 commit comments

Comments
 (0)