Skip to content

Commit 24ddce6

Browse files
authored
[GISel] Legalize bitreverse with types smaller than 8 bits (#92998)
This patch adds support for lowering `bitreverse` with types smaller than 8 bits. It also fixes an existing assertion failure in `llvm::APInt::getSplat`: https://godbolt.org/z/7crs8xrcG The lowering logic is copied from SDAG: https://github.com/llvm/llvm-project/blob/2034f2fc8729bd4645ef7caa3c5c6efa284d2d3f/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp#L9384-L9398
1 parent 180448b commit 24ddce6

File tree

4 files changed

+828
-22
lines changed

4 files changed

+828
-22
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7977,27 +7977,51 @@ LegalizerHelper::LegalizeResult
79777977
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
79787978
auto [Dst, Src] = MI.getFirst2Regs();
79797979
const LLT Ty = MRI.getType(Src);
7980-
unsigned Size = Ty.getSizeInBits();
7980+
unsigned Size = Ty.getScalarSizeInBits();
7981+
7982+
if (Size >= 8) {
7983+
MachineInstrBuilder BSWAP =
7984+
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7985+
7986+
// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7987+
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7988+
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7989+
MachineInstrBuilder Swap4 =
7990+
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7991+
7992+
// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7993+
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7994+
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7995+
MachineInstrBuilder Swap2 =
7996+
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7997+
7998+
// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
7999+
// 6|7
8000+
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
8001+
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8002+
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8003+
} else {
8004+
// Expand bitreverse for types smaller than 8 bits.
8005+
MachineInstrBuilder Tmp;
8006+
for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
8007+
MachineInstrBuilder Tmp2;
8008+
if (I < J) {
8009+
auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
8010+
Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
8011+
} else {
8012+
auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
8013+
Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
8014+
}
79818015

7982-
MachineInstrBuilder BSWAP =
7983-
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7984-
7985-
// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7986-
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7987-
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7988-
MachineInstrBuilder Swap4 =
7989-
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7990-
7991-
// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7992-
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7993-
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7994-
MachineInstrBuilder Swap2 =
7995-
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7996-
7997-
// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7998-
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7999-
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
8000-
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8016+
auto Mask = MIRBuilder.buildConstant(Ty, 1U << J);
8017+
Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
8018+
if (I == 0)
8019+
Tmp = Tmp2;
8020+
else
8021+
Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
8022+
}
8023+
MIRBuilder.buildCopy(Dst, Tmp);
8024+
}
80018025

80028026
MI.eraseFromParent();
80038027
return Legalized;
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV32
3+
; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV64
4+
5+
define i2 @bitreverse_i2(i2 %x) {
6+
; RV32-LABEL: bitreverse_i2:
7+
; RV32: # %bb.0:
8+
; RV32-NEXT: slli a1, a0, 1
9+
; RV32-NEXT: andi a1, a1, 2
10+
; RV32-NEXT: andi a0, a0, 3
11+
; RV32-NEXT: srli a0, a0, 1
12+
; RV32-NEXT: or a0, a1, a0
13+
; RV32-NEXT: ret
14+
;
15+
; RV64-LABEL: bitreverse_i2:
16+
; RV64: # %bb.0:
17+
; RV64-NEXT: slli a1, a0, 1
18+
; RV64-NEXT: andi a1, a1, 2
19+
; RV64-NEXT: andi a0, a0, 3
20+
; RV64-NEXT: srliw a0, a0, 1
21+
; RV64-NEXT: or a0, a1, a0
22+
; RV64-NEXT: ret
23+
%rev = call i2 @llvm.bitreverse.i2(i2 %x)
24+
ret i2 %rev
25+
}
26+
27+
define i3 @bitreverse_i3(i3 %x) {
28+
; RV32-LABEL: bitreverse_i3:
29+
; RV32: # %bb.0:
30+
; RV32-NEXT: slli a1, a0, 2
31+
; RV32-NEXT: andi a1, a1, 4
32+
; RV32-NEXT: andi a0, a0, 7
33+
; RV32-NEXT: andi a2, a0, 2
34+
; RV32-NEXT: or a1, a1, a2
35+
; RV32-NEXT: srli a0, a0, 2
36+
; RV32-NEXT: or a0, a1, a0
37+
; RV32-NEXT: ret
38+
;
39+
; RV64-LABEL: bitreverse_i3:
40+
; RV64: # %bb.0:
41+
; RV64-NEXT: slli a1, a0, 2
42+
; RV64-NEXT: andi a1, a1, 4
43+
; RV64-NEXT: andi a0, a0, 7
44+
; RV64-NEXT: andi a2, a0, 2
45+
; RV64-NEXT: or a1, a1, a2
46+
; RV64-NEXT: srliw a0, a0, 2
47+
; RV64-NEXT: or a0, a1, a0
48+
; RV64-NEXT: ret
49+
%rev = call i3 @llvm.bitreverse.i3(i3 %x)
50+
ret i3 %rev
51+
}
52+
53+
define i4 @bitreverse_i4(i4 %x) {
54+
; RV32-LABEL: bitreverse_i4:
55+
; RV32: # %bb.0:
56+
; RV32-NEXT: slli a1, a0, 3
57+
; RV32-NEXT: andi a1, a1, 8
58+
; RV32-NEXT: slli a2, a0, 1
59+
; RV32-NEXT: andi a2, a2, 4
60+
; RV32-NEXT: or a1, a1, a2
61+
; RV32-NEXT: andi a0, a0, 15
62+
; RV32-NEXT: srli a2, a0, 1
63+
; RV32-NEXT: andi a2, a2, 2
64+
; RV32-NEXT: or a1, a1, a2
65+
; RV32-NEXT: srli a0, a0, 3
66+
; RV32-NEXT: or a0, a1, a0
67+
; RV32-NEXT: ret
68+
;
69+
; RV64-LABEL: bitreverse_i4:
70+
; RV64: # %bb.0:
71+
; RV64-NEXT: slli a1, a0, 3
72+
; RV64-NEXT: andi a1, a1, 8
73+
; RV64-NEXT: slli a2, a0, 1
74+
; RV64-NEXT: andi a2, a2, 4
75+
; RV64-NEXT: or a1, a1, a2
76+
; RV64-NEXT: andi a0, a0, 15
77+
; RV64-NEXT: srliw a2, a0, 1
78+
; RV64-NEXT: andi a2, a2, 2
79+
; RV64-NEXT: or a1, a1, a2
80+
; RV64-NEXT: srliw a0, a0, 3
81+
; RV64-NEXT: or a0, a1, a0
82+
; RV64-NEXT: ret
83+
%rev = call i4 @llvm.bitreverse.i4(i4 %x)
84+
ret i4 %rev
85+
}
86+
87+
define i7 @bitreverse_i7(i7 %x) {
88+
; RV32-LABEL: bitreverse_i7:
89+
; RV32: # %bb.0:
90+
; RV32-NEXT: slli a1, a0, 6
91+
; RV32-NEXT: andi a1, a1, 64
92+
; RV32-NEXT: slli a2, a0, 4
93+
; RV32-NEXT: andi a2, a2, 32
94+
; RV32-NEXT: or a1, a1, a2
95+
; RV32-NEXT: slli a2, a0, 2
96+
; RV32-NEXT: andi a2, a2, 16
97+
; RV32-NEXT: andi a0, a0, 127
98+
; RV32-NEXT: andi a3, a0, 8
99+
; RV32-NEXT: or a2, a2, a3
100+
; RV32-NEXT: or a1, a1, a2
101+
; RV32-NEXT: srli a2, a0, 2
102+
; RV32-NEXT: andi a2, a2, 4
103+
; RV32-NEXT: srli a3, a0, 4
104+
; RV32-NEXT: andi a3, a3, 2
105+
; RV32-NEXT: or a2, a2, a3
106+
; RV32-NEXT: or a1, a1, a2
107+
; RV32-NEXT: srli a0, a0, 6
108+
; RV32-NEXT: or a0, a1, a0
109+
; RV32-NEXT: ret
110+
;
111+
; RV64-LABEL: bitreverse_i7:
112+
; RV64: # %bb.0:
113+
; RV64-NEXT: slli a1, a0, 6
114+
; RV64-NEXT: andi a1, a1, 64
115+
; RV64-NEXT: slli a2, a0, 4
116+
; RV64-NEXT: andi a2, a2, 32
117+
; RV64-NEXT: or a1, a1, a2
118+
; RV64-NEXT: slli a2, a0, 2
119+
; RV64-NEXT: andi a2, a2, 16
120+
; RV64-NEXT: andi a0, a0, 127
121+
; RV64-NEXT: andi a3, a0, 8
122+
; RV64-NEXT: or a2, a2, a3
123+
; RV64-NEXT: or a1, a1, a2
124+
; RV64-NEXT: srliw a2, a0, 2
125+
; RV64-NEXT: andi a2, a2, 4
126+
; RV64-NEXT: srliw a3, a0, 4
127+
; RV64-NEXT: andi a3, a3, 2
128+
; RV64-NEXT: or a2, a2, a3
129+
; RV64-NEXT: or a1, a1, a2
130+
; RV64-NEXT: srliw a0, a0, 6
131+
; RV64-NEXT: or a0, a1, a0
132+
; RV64-NEXT: ret
133+
%rev = call i7 @llvm.bitreverse.i7(i7 %x)
134+
ret i7 %rev
135+
}
136+
137+
define i24 @bitreverse_i24(i24 %x) {
138+
; RV32-LABEL: bitreverse_i24:
139+
; RV32: # %bb.0:
140+
; RV32-NEXT: slli a1, a0, 16
141+
; RV32-NEXT: lui a2, 4096
142+
; RV32-NEXT: addi a2, a2, -1
143+
; RV32-NEXT: and a0, a0, a2
144+
; RV32-NEXT: srli a0, a0, 16
145+
; RV32-NEXT: or a0, a0, a1
146+
; RV32-NEXT: lui a1, 1048335
147+
; RV32-NEXT: addi a1, a1, 240
148+
; RV32-NEXT: and a3, a1, a2
149+
; RV32-NEXT: and a3, a0, a3
150+
; RV32-NEXT: srli a3, a3, 4
151+
; RV32-NEXT: slli a0, a0, 4
152+
; RV32-NEXT: and a0, a0, a1
153+
; RV32-NEXT: or a0, a3, a0
154+
; RV32-NEXT: lui a1, 1047757
155+
; RV32-NEXT: addi a1, a1, -820
156+
; RV32-NEXT: and a3, a1, a2
157+
; RV32-NEXT: and a3, a0, a3
158+
; RV32-NEXT: srli a3, a3, 2
159+
; RV32-NEXT: slli a0, a0, 2
160+
; RV32-NEXT: and a0, a0, a1
161+
; RV32-NEXT: or a0, a3, a0
162+
; RV32-NEXT: lui a1, 1047211
163+
; RV32-NEXT: addi a1, a1, -1366
164+
; RV32-NEXT: and a2, a1, a2
165+
; RV32-NEXT: and a2, a0, a2
166+
; RV32-NEXT: srli a2, a2, 1
167+
; RV32-NEXT: slli a0, a0, 1
168+
; RV32-NEXT: and a0, a0, a1
169+
; RV32-NEXT: or a0, a2, a0
170+
; RV32-NEXT: ret
171+
;
172+
; RV64-LABEL: bitreverse_i24:
173+
; RV64: # %bb.0:
174+
; RV64-NEXT: slli a1, a0, 16
175+
; RV64-NEXT: lui a2, 4096
176+
; RV64-NEXT: addi a2, a2, -1
177+
; RV64-NEXT: and a0, a0, a2
178+
; RV64-NEXT: srliw a0, a0, 16
179+
; RV64-NEXT: or a0, a0, a1
180+
; RV64-NEXT: lui a1, 1048335
181+
; RV64-NEXT: addi a1, a1, 240
182+
; RV64-NEXT: and a3, a1, a2
183+
; RV64-NEXT: and a3, a0, a3
184+
; RV64-NEXT: srliw a3, a3, 4
185+
; RV64-NEXT: slli a0, a0, 4
186+
; RV64-NEXT: and a0, a0, a1
187+
; RV64-NEXT: or a0, a3, a0
188+
; RV64-NEXT: lui a1, 1047757
189+
; RV64-NEXT: addi a1, a1, -820
190+
; RV64-NEXT: and a3, a1, a2
191+
; RV64-NEXT: and a3, a0, a3
192+
; RV64-NEXT: srliw a3, a3, 2
193+
; RV64-NEXT: slli a0, a0, 2
194+
; RV64-NEXT: and a0, a0, a1
195+
; RV64-NEXT: or a0, a3, a0
196+
; RV64-NEXT: lui a1, 1047211
197+
; RV64-NEXT: addiw a1, a1, -1366
198+
; RV64-NEXT: and a2, a1, a2
199+
; RV64-NEXT: and a2, a0, a2
200+
; RV64-NEXT: srliw a2, a2, 1
201+
; RV64-NEXT: slliw a0, a0, 1
202+
; RV64-NEXT: and a0, a0, a1
203+
; RV64-NEXT: or a0, a2, a0
204+
; RV64-NEXT: ret
205+
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
206+
ret i24 %rev
207+
}

0 commit comments

Comments
 (0)