Skip to content

[TableGen] Optimize intrinsic info type signature encoding #106809

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
30 changes: 30 additions & 0 deletions llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===- GetIntrinsicInfoTableEntries.cpp - IIT signature benchmark ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "benchmark/benchmark.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Intrinsics.h"

using namespace llvm;
using namespace Intrinsic;

static void BM_GetIntrinsicInfoTableEntries(benchmark::State &state) {
SmallVector<IITDescriptor> Table;
for (auto _ : state) {
for (ID ID = 1; ID < num_intrinsics; ++ID) {
// This makes sure the vector does not keep growing, as well as after the
// first iteration does not result in additional allocations.
Table.clear();
getIntrinsicInfoTableEntries(ID, Table);
}
}
}

BENCHMARK(BM_GetIntrinsicInfoTableEntries);

BENCHMARK_MAIN();
14 changes: 8 additions & 6 deletions llvm/lib/IR/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1381,22 +1381,24 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,

void Intrinsic::getIntrinsicInfoTableEntries(ID id,
SmallVectorImpl<IITDescriptor> &T){
static_assert(sizeof(IIT_Table[0]) == 2,
"Expect 16-bit entries in IIT_Table");
// Check to see if the intrinsic's type was expressible by the table.
unsigned TableVal = IIT_Table[id-1];
uint16_t TableVal = IIT_Table[id - 1];

// Decode the TableVal into an array of IITValues.
SmallVector<unsigned char, 8> IITValues;
SmallVector<unsigned char> IITValues;
ArrayRef<unsigned char> IITEntries;
unsigned NextElt = 0;
if ((TableVal >> 31) != 0) {
if (TableVal >> 15) {
// This is an offset into the IIT_LongEncodingTable.
IITEntries = IIT_LongEncodingTable;

// Strip sentinel bit.
NextElt = (TableVal << 1) >> 1;
NextElt = TableVal & 0x7fff;
} else {
// Decode the TableVal into an array of IITValues. If the entry was encoded
// into a single word in the table itself, decode it now.
// If the entry was encoded into a single word in the table itself, decode
// it from an array of nibbles to an array of bytes.
do {
IITValues.push_back(TableVal & 0xF);
TableVal >>= 4;
Expand Down
80 changes: 48 additions & 32 deletions llvm/utils/TableGen/IntrinsicEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,37 @@ static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) {
return TypeSig;
}

// Pack the type signature into 32-bit fixed encoding word.
static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) {
if (TypeSig.size() > 8)
return std::nullopt;

uint32_t Result = 0;
for (unsigned char C : reverse(TypeSig)) {
if (C > 15)
return std::nullopt;
Result = (Result << 4) | C;
}
return Result;
}

void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
// If we can compute a 32-bit fixed encoding for this intrinsic, do so and
// Note: the code below can be switched to use 32-bit fixed encoding by
// flipping the flag below.
constexpr bool Use16BitFixedEncoding = true;
using FixedEncodingTy =
std::conditional_t<Use16BitFixedEncoding, uint16_t, uint32_t>;
constexpr unsigned FixedEncodingBits = sizeof(FixedEncodingTy) * CHAR_BIT;
// Mask with all bits 1 except the most significant bit.
const unsigned Mask = (1U << (FixedEncodingBits - 1)) - 1;
const unsigned MSBPostion = FixedEncodingBits - 1;
StringRef FixedEncodingTypeName =
Use16BitFixedEncoding ? "uint16_t" : "uint32_t";

// If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
// capture it in this vector, otherwise store a ~0U.
std::vector<unsigned> FixedEncodings;
std::vector<FixedEncodingTy> FixedEncodings;
SequenceToOffsetTable<TypeSigTy> LongEncodingTable;

FixedEncodings.reserve(Ints.size());
Expand All @@ -296,69 +322,59 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
// Get the signature for the intrinsic.
TypeSigTy TypeSig = ComputeTypeSignature(Int);

// Check to see if we can encode it into a 32-bit word. We can only encode
// 8 nibbles into a 32-bit word.
if (TypeSig.size() <= 8) {
// Attempt to pack elements of TypeSig into a 32-bit word, starting from
// the most significant nibble.
unsigned Result = 0;
bool Failed = false;
for (unsigned char C : reverse(TypeSig)) {
if (C > 15) {
Failed = true;
break;
}
Result = (Result << 4) | C;
}

// If this could be encoded into a 31-bit word, return it.
if (!Failed && (Result >> 31) == 0) {
FixedEncodings.push_back(Result);
continue;
}
// Check to see if we can encode it into a 16/32 bit word.
std::optional<uint32_t> Result = encodePacked(TypeSig);
if (Result && (*Result & Mask) == Result) {
FixedEncodings.push_back(static_cast<FixedEncodingTy>(*Result));
continue;
}

// Otherwise, we're going to unique the sequence into the
// LongEncodingTable, and use its offset in the 32-bit table instead.
LongEncodingTable.add(TypeSig);

// This is a placehold that we'll replace after the table is laid out.
FixedEncodings.push_back(~0U);
FixedEncodings.push_back(static_cast<FixedEncodingTy>(~0U));
}

LongEncodingTable.layout();

OS << R"(// Global intrinsic function declaration type table.
OS << formatv(R"(// Global intrinsic function declaration type table.
#ifdef GET_INTRINSIC_GENERATOR_GLOBAL
static constexpr unsigned IIT_Table[] = {
)";
static constexpr {0} IIT_Table[] = {{
)",
FixedEncodingTypeName);

unsigned MaxOffset = 0;
for (auto [Idx, FixedEncoding, Int] : enumerate(FixedEncodings, Ints)) {
if ((Idx & 7) == 7)
OS << "\n ";

// If the entry fit in the table, just emit it.
if (FixedEncoding != ~0U) {
if ((FixedEncoding & Mask) == FixedEncoding) {
OS << "0x" << Twine::utohexstr(FixedEncoding) << ", ";
continue;
}

TypeSigTy TypeSig = ComputeTypeSignature(Int);
unsigned Offset = LongEncodingTable.get(TypeSig);
MaxOffset = std::max(MaxOffset, Offset);

// Otherwise, emit the offset into the long encoding table. We emit it this
// way so that it is easier to read the offset in the .def file.
OS << "(1U<<31) | " << LongEncodingTable.get(TypeSig) << ", ";
OS << formatv("(1U<<{0}) | {1}, ", MSBPostion, Offset);
}

OS << "0\n};\n\n";

// verify that all offsets will fit in 16/32 bits.
if ((MaxOffset & Mask) != MaxOffset)
PrintFatalError("Offset of long encoding table exceeds encoding bits");

// Emit the shared table of register lists.
OS << "static constexpr unsigned char IIT_LongEncodingTable[] = {\n";
if (!LongEncodingTable.empty())
LongEncodingTable.emit(
OS, [](raw_ostream &OS, unsigned char C) { OS << (unsigned)C; });
OS << " 255\n};\n\n";

OS << " 255\n};\n";
OS << "#endif\n\n"; // End of GET_INTRINSIC_GENERATOR_GLOBAL
}

Expand Down
Loading