Skip to content

Commit c7bd284

Browse files
shamithokeshami
andauthored
Support for i8/i16 for bitreverse using GFNI. (#88625)
In continuation to the PR #81764, this change extends the GFNI support to i8 and i16. --------- Co-authored-by: shami <[email protected]>
1 parent f2923e3 commit c7bd284

File tree

3 files changed

+38
-111
lines changed

3 files changed

+38
-111
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,8 +1287,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
12871287
}
12881288

12891289
if (Subtarget.hasGFNI()) {
1290-
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
1291-
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
1290+
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
1291+
setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
1292+
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
1293+
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
12921294
}
12931295

12941296
// These might be better off as horizontal vector ops.
@@ -31317,16 +31319,18 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
3131731319
if (VT.is256BitVector() && !Subtarget.hasInt256())
3131831320
return splitVectorIntUnary(Op, DAG, DL);
3131931321

31320-
// Lower i32/i64 as vXi8 BITREVERSE + BSWAP
31322+
// Lower i8/i16/i32/i64 as vXi8 BITREVERSE + BSWAP
3132131323
if (!VT.isVector()) {
31322-
assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
31324+
assert(
31325+
(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&
31326+
"Only tested for i8/i16/i32/i64");
3132331327
MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
3132431328
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
3132531329
Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
3132631330
DAG.getBitcast(MVT::v16i8, Res));
3132731331
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
3132831332
DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
31329-
return DAG.getNode(ISD::BSWAP, DL, VT, Res);
31333+
return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
3133031334
}
3133131335

3133231336
assert(VT.isVector() && VT.getSizeInBits() >= 128);

llvm/test/CodeGen/X86/bitreverse.ll

Lines changed: 13 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
374374
;
375375
; GFNI-LABEL: test_bitreverse_i16:
376376
; GFNI: # %bb.0:
377-
; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
378-
; GFNI-NEXT: rolw $8, %di
379-
; GFNI-NEXT: movl %edi, %eax
380-
; GFNI-NEXT: andl $3855, %eax # imm = 0xF0F
381-
; GFNI-NEXT: shll $4, %eax
382-
; GFNI-NEXT: shrl $4, %edi
383-
; GFNI-NEXT: andl $3855, %edi # imm = 0xF0F
384-
; GFNI-NEXT: orl %eax, %edi
385-
; GFNI-NEXT: movl %edi, %eax
386-
; GFNI-NEXT: andl $13107, %eax # imm = 0x3333
387-
; GFNI-NEXT: shrl $2, %edi
388-
; GFNI-NEXT: andl $13107, %edi # imm = 0x3333
389-
; GFNI-NEXT: leal (%rdi,%rax,4), %eax
390-
; GFNI-NEXT: movl %eax, %ecx
391-
; GFNI-NEXT: andl $21845, %ecx # imm = 0x5555
392-
; GFNI-NEXT: shrl %eax
393-
; GFNI-NEXT: andl $21845, %eax # imm = 0x5555
394-
; GFNI-NEXT: leal (%rax,%rcx,2), %eax
377+
; GFNI-NEXT: vmovd %edi, %xmm0
378+
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
379+
; GFNI-NEXT: vmovd %xmm0, %eax
380+
; GFNI-NEXT: rolw $8, %ax
395381
; GFNI-NEXT: # kill: def $ax killed $ax killed $eax
396382
; GFNI-NEXT: retq
397383
%b = call i16 @llvm.bitreverse.i16(i16 %a)
@@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) {
446432
;
447433
; GFNI-LABEL: test_bitreverse_i8:
448434
; GFNI: # %bb.0:
449-
; GFNI-NEXT: rolb $4, %dil
450-
; GFNI-NEXT: movl %edi, %eax
451-
; GFNI-NEXT: andb $51, %al
452-
; GFNI-NEXT: shlb $2, %al
453-
; GFNI-NEXT: shrb $2, %dil
454-
; GFNI-NEXT: andb $51, %dil
455-
; GFNI-NEXT: orb %dil, %al
456-
; GFNI-NEXT: movl %eax, %ecx
457-
; GFNI-NEXT: andb $85, %cl
458-
; GFNI-NEXT: addb %cl, %cl
459-
; GFNI-NEXT: shrb %al
460-
; GFNI-NEXT: andb $85, %al
461-
; GFNI-NEXT: orb %cl, %al
435+
; GFNI-NEXT: vmovd %edi, %xmm0
436+
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
437+
; GFNI-NEXT: vmovd %xmm0, %eax
438+
; GFNI-NEXT: # kill: def $al killed $al killed $eax
462439
; GFNI-NEXT: retq
463440
%b = call i8 @llvm.bitreverse.i8(i8 %a)
464441
ret i8 %b
@@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) {
514491
;
515492
; GFNI-LABEL: test_bitreverse_i4:
516493
; GFNI: # %bb.0:
517-
; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
518-
; GFNI-NEXT: movl %edi, %eax
519-
; GFNI-NEXT: andb $8, %al
520-
; GFNI-NEXT: leal (%rdi,%rdi), %ecx
521-
; GFNI-NEXT: andb $4, %cl
522-
; GFNI-NEXT: leal (,%rdi,8), %edx
523-
; GFNI-NEXT: andb $8, %dl
524-
; GFNI-NEXT: orb %cl, %dl
525-
; GFNI-NEXT: shrb %dil
526-
; GFNI-NEXT: andb $2, %dil
527-
; GFNI-NEXT: orb %dil, %dl
528-
; GFNI-NEXT: shrb $3, %al
529-
; GFNI-NEXT: orb %dl, %al
494+
; GFNI-NEXT: vmovd %edi, %xmm0
495+
; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
496+
; GFNI-NEXT: vmovd %xmm0, %eax
497+
; GFNI-NEXT: shrb $4, %al
498+
; GFNI-NEXT: # kill: def $al killed $al killed $eax
530499
; GFNI-NEXT: retq
531500
%b = call i4 @llvm.bitreverse.i4(i4 %a)
532501
ret i4 %b

llvm/test/CodeGen/X86/vector-bitreverse.ll

Lines changed: 16 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
6161
;
6262
; GFNISSE-LABEL: test_bitreverse_i8:
6363
; GFNISSE: # %bb.0:
64-
; GFNISSE-NEXT: rolb $4, %dil
65-
; GFNISSE-NEXT: movl %edi, %eax
66-
; GFNISSE-NEXT: andb $51, %al
67-
; GFNISSE-NEXT: shlb $2, %al
68-
; GFNISSE-NEXT: shrb $2, %dil
69-
; GFNISSE-NEXT: andb $51, %dil
70-
; GFNISSE-NEXT: orb %dil, %al
71-
; GFNISSE-NEXT: movl %eax, %ecx
72-
; GFNISSE-NEXT: andb $85, %cl
73-
; GFNISSE-NEXT: addb %cl, %cl
74-
; GFNISSE-NEXT: shrb %al
75-
; GFNISSE-NEXT: andb $85, %al
76-
; GFNISSE-NEXT: orb %cl, %al
64+
; GFNISSE-NEXT: movd %edi, %xmm0
65+
; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
66+
; GFNISSE-NEXT: movd %xmm0, %eax
67+
; GFNISSE-NEXT: # kill: def $al killed $al killed $eax
7768
; GFNISSE-NEXT: retq
7869
;
7970
; GFNIAVX-LABEL: test_bitreverse_i8:
8071
; GFNIAVX: # %bb.0:
81-
; GFNIAVX-NEXT: rolb $4, %dil
82-
; GFNIAVX-NEXT: movl %edi, %eax
83-
; GFNIAVX-NEXT: andb $51, %al
84-
; GFNIAVX-NEXT: shlb $2, %al
85-
; GFNIAVX-NEXT: shrb $2, %dil
86-
; GFNIAVX-NEXT: andb $51, %dil
87-
; GFNIAVX-NEXT: orb %dil, %al
88-
; GFNIAVX-NEXT: movl %eax, %ecx
89-
; GFNIAVX-NEXT: andb $85, %cl
90-
; GFNIAVX-NEXT: addb %cl, %cl
91-
; GFNIAVX-NEXT: shrb %al
92-
; GFNIAVX-NEXT: andb $85, %al
93-
; GFNIAVX-NEXT: orb %cl, %al
72+
; GFNIAVX-NEXT: vmovd %edi, %xmm0
73+
; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
74+
; GFNIAVX-NEXT: vmovd %xmm0, %eax
75+
; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax
9476
; GFNIAVX-NEXT: retq
9577
%b = call i8 @llvm.bitreverse.i8(i8 %a)
9678
ret i8 %b
@@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
153135
;
154136
; GFNISSE-LABEL: test_bitreverse_i16:
155137
; GFNISSE: # %bb.0:
156-
; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi
157-
; GFNISSE-NEXT: rolw $8, %di
158-
; GFNISSE-NEXT: movl %edi, %eax
159-
; GFNISSE-NEXT: andl $3855, %eax # imm = 0xF0F
160-
; GFNISSE-NEXT: shll $4, %eax
161-
; GFNISSE-NEXT: shrl $4, %edi
162-
; GFNISSE-NEXT: andl $3855, %edi # imm = 0xF0F
163-
; GFNISSE-NEXT: orl %eax, %edi
164-
; GFNISSE-NEXT: movl %edi, %eax
165-
; GFNISSE-NEXT: andl $13107, %eax # imm = 0x3333
166-
; GFNISSE-NEXT: shrl $2, %edi
167-
; GFNISSE-NEXT: andl $13107, %edi # imm = 0x3333
168-
; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax
169-
; GFNISSE-NEXT: movl %eax, %ecx
170-
; GFNISSE-NEXT: andl $21845, %ecx # imm = 0x5555
171-
; GFNISSE-NEXT: shrl %eax
172-
; GFNISSE-NEXT: andl $21845, %eax # imm = 0x5555
173-
; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax
138+
; GFNISSE-NEXT: movd %edi, %xmm0
139+
; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
140+
; GFNISSE-NEXT: movd %xmm0, %eax
141+
; GFNISSE-NEXT: rolw $8, %ax
174142
; GFNISSE-NEXT: # kill: def $ax killed $ax killed $eax
175143
; GFNISSE-NEXT: retq
176144
;
177145
; GFNIAVX-LABEL: test_bitreverse_i16:
178146
; GFNIAVX: # %bb.0:
179-
; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi
180-
; GFNIAVX-NEXT: rolw $8, %di
181-
; GFNIAVX-NEXT: movl %edi, %eax
182-
; GFNIAVX-NEXT: andl $3855, %eax # imm = 0xF0F
183-
; GFNIAVX-NEXT: shll $4, %eax
184-
; GFNIAVX-NEXT: shrl $4, %edi
185-
; GFNIAVX-NEXT: andl $3855, %edi # imm = 0xF0F
186-
; GFNIAVX-NEXT: orl %eax, %edi
187-
; GFNIAVX-NEXT: movl %edi, %eax
188-
; GFNIAVX-NEXT: andl $13107, %eax # imm = 0x3333
189-
; GFNIAVX-NEXT: shrl $2, %edi
190-
; GFNIAVX-NEXT: andl $13107, %edi # imm = 0x3333
191-
; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax
192-
; GFNIAVX-NEXT: movl %eax, %ecx
193-
; GFNIAVX-NEXT: andl $21845, %ecx # imm = 0x5555
194-
; GFNIAVX-NEXT: shrl %eax
195-
; GFNIAVX-NEXT: andl $21845, %eax # imm = 0x5555
196-
; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax
147+
; GFNIAVX-NEXT: vmovd %edi, %xmm0
148+
; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
149+
; GFNIAVX-NEXT: vmovd %xmm0, %eax
150+
; GFNIAVX-NEXT: rolw $8, %ax
197151
; GFNIAVX-NEXT: # kill: def $ax killed $ax killed $eax
198152
; GFNIAVX-NEXT: retq
199153
%b = call i16 @llvm.bitreverse.i16(i16 %a)

0 commit comments

Comments
 (0)