Skip to content

[GISel] Legalize bitreverse with types smaller than 8 bits #92998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 44 additions & 20 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7975,27 +7975,51 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned Size = Ty.getSizeInBits();
unsigned Size = Ty.getScalarSizeInBits();

if (Size >= 8) {
MachineInstrBuilder BSWAP =
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});

// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
MachineInstrBuilder Swap4 =
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));

// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
MachineInstrBuilder Swap2 =
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));

// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
// 6|7
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
} else {
// Expand bitreverse for types smaller than 8 bits.
MachineInstrBuilder Tmp;
for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
MachineInstrBuilder Tmp2;
if (I < J) {
auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
} else {
auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
}

MachineInstrBuilder BSWAP =
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});

// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
MachineInstrBuilder Swap4 =
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));

// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
MachineInstrBuilder Swap2 =
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));

// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
auto Mask = MIRBuilder.buildConstant(Ty, 1U << J);
Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
if (I == 0)
Tmp = Tmp2;
else
Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
}
MIRBuilder.buildCopy(Dst, Tmp);
}

MI.eraseFromParent();
return Legalized;
Expand Down
207 changes: 207 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV32
; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV64

define i2 @bitreverse_i2(i2 %x) {
; RV32-LABEL: bitreverse_i2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 1
; RV32-NEXT: andi a1, a1, 2
; RV32-NEXT: andi a0, a0, 3
; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_i2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 1
; RV64-NEXT: andi a1, a1, 2
; RV64-NEXT: andi a0, a0, 3
; RV64-NEXT: srliw a0, a0, 1
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i2 @llvm.bitreverse.i2(i2 %x)
ret i2 %rev
}

define i3 @bitreverse_i3(i3 %x) {
; RV32-LABEL: bitreverse_i3:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 2
; RV32-NEXT: andi a1, a1, 4
; RV32-NEXT: andi a0, a0, 7
; RV32-NEXT: andi a2, a0, 2
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a0, a0, 2
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_i3:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 2
; RV64-NEXT: andi a1, a1, 4
; RV64-NEXT: andi a0, a0, 7
; RV64-NEXT: andi a2, a0, 2
; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srliw a0, a0, 2
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i3 @llvm.bitreverse.i3(i3 %x)
ret i3 %rev
}

define i4 @bitreverse_i4(i4 %x) {
; RV32-LABEL: bitreverse_i4:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 3
; RV32-NEXT: andi a1, a1, 8
; RV32-NEXT: slli a2, a0, 1
; RV32-NEXT: andi a2, a2, 4
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: andi a0, a0, 15
; RV32-NEXT: srli a2, a0, 1
; RV32-NEXT: andi a2, a2, 2
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a0, a0, 3
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_i4:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 3
; RV64-NEXT: andi a1, a1, 8
; RV64-NEXT: slli a2, a0, 1
; RV64-NEXT: andi a2, a2, 4
; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: andi a0, a0, 15
; RV64-NEXT: srliw a2, a0, 1
; RV64-NEXT: andi a2, a2, 2
; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srliw a0, a0, 3
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i4 @llvm.bitreverse.i4(i4 %x)
ret i4 %rev
}

define i7 @bitreverse_i7(i7 %x) {
; RV32-LABEL: bitreverse_i7:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 6
; RV32-NEXT: andi a1, a1, 64
; RV32-NEXT: slli a2, a0, 4
; RV32-NEXT: andi a2, a2, 32
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: slli a2, a0, 2
; RV32-NEXT: andi a2, a2, 16
; RV32-NEXT: andi a0, a0, 127
; RV32-NEXT: andi a3, a0, 8
; RV32-NEXT: or a2, a2, a3
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a2, a0, 2
; RV32-NEXT: andi a2, a2, 4
; RV32-NEXT: srli a3, a0, 4
; RV32-NEXT: andi a3, a3, 2
; RV32-NEXT: or a2, a2, a3
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: srli a0, a0, 6
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_i7:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 6
; RV64-NEXT: andi a1, a1, 64
; RV64-NEXT: slli a2, a0, 4
; RV64-NEXT: andi a2, a2, 32
; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: slli a2, a0, 2
; RV64-NEXT: andi a2, a2, 16
; RV64-NEXT: andi a0, a0, 127
; RV64-NEXT: andi a3, a0, 8
; RV64-NEXT: or a2, a2, a3
; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srliw a2, a0, 2
; RV64-NEXT: andi a2, a2, 4
; RV64-NEXT: srliw a3, a0, 4
; RV64-NEXT: andi a3, a3, 2
; RV64-NEXT: or a2, a2, a3
; RV64-NEXT: or a1, a1, a2
; RV64-NEXT: srliw a0, a0, 6
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%rev = call i7 @llvm.bitreverse.i7(i7 %x)
ret i7 %rev
}

define i24 @bitreverse_i24(i24 %x) {
; RV32-LABEL: bitreverse_i24:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 16
; RV32-NEXT: lui a2, 4096
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: lui a1, 1048335
; RV32-NEXT: addi a1, a1, 240
; RV32-NEXT: and a3, a1, a2
; RV32-NEXT: and a3, a0, a3
; RV32-NEXT: srli a3, a3, 4
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: or a0, a3, a0
; RV32-NEXT: lui a1, 1047757
; RV32-NEXT: addi a1, a1, -820
; RV32-NEXT: and a3, a1, a2
; RV32-NEXT: and a3, a0, a3
; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: or a0, a3, a0
; RV32-NEXT: lui a1, 1047211
; RV32-NEXT: addi a1, a1, -1366
; RV32-NEXT: and a2, a1, a2
; RV32-NEXT: and a2, a0, a2
; RV32-NEXT: srli a2, a2, 1
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: or a0, a2, a0
; RV32-NEXT: ret
;
; RV64-LABEL: bitreverse_i24:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 16
; RV64-NEXT: lui a2, 4096
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: srliw a0, a0, 16
; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: lui a1, 1048335
; RV64-NEXT: addi a1, a1, 240
; RV64-NEXT: and a3, a1, a2
; RV64-NEXT: and a3, a0, a3
; RV64-NEXT: srliw a3, a3, 4
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: or a0, a3, a0
; RV64-NEXT: lui a1, 1047757
; RV64-NEXT: addi a1, a1, -820
; RV64-NEXT: and a3, a1, a2
; RV64-NEXT: and a3, a0, a3
; RV64-NEXT: srliw a3, a3, 2
; RV64-NEXT: slli a0, a0, 2
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: or a0, a3, a0
; RV64-NEXT: lui a1, 1047211
; RV64-NEXT: addiw a1, a1, -1366
; RV64-NEXT: and a2, a1, a2
; RV64-NEXT: and a2, a0, a2
; RV64-NEXT: srliw a2, a2, 1
; RV64-NEXT: slliw a0, a0, 1
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: or a0, a2, a0
; RV64-NEXT: ret
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
ret i24 %rev
}
Loading
Loading