-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[TableGen] Optimize intrinsic info type signature encoding #106809
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
3c97e88
to
5ccc29f
Compare
Actual benchmark runs:
Speedup = 511877 -> 474293 = 7.3% |
5ccc29f
to
a974951
Compare
@llvm/pr-subscribers-llvm-ir Author: Rahul Joshi (jurahul) ChangesChange the "fixed encoding" table used for encoding intrinsic type signature to use 16-bit encoding as opposed to 32-bit. Currently measured data is as follows:
For time, with the attached benchmark, we see a 7.3% speedup in Full diff: https://github.com/llvm/llvm-project/pull/106809.diff 4 Files Affected:
diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt
index e3366e6f3ffe19..aa0cb777733441 100644
--- a/llvm/benchmarks/CMakeLists.txt
+++ b/llvm/benchmarks/CMakeLists.txt
@@ -6,3 +6,4 @@ add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
+add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
diff --git a/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
new file mode 100644
index 00000000000000..2854bacc5ab094
--- /dev/null
+++ b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp
@@ -0,0 +1,31 @@
+//===- GetIntrinsicInfoTableEntries.cpp - IIT signature benchmark ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Intrinsics.h"
+#include <variant>
+
+using namespace llvm;
+using namespace Intrinsic;
+
+static void BM_GetIntrinsicInfoTableEntries(benchmark::State &state) {
+ SmallVector<IITDescriptor> Table;
+ for (auto _ : state) {
+ for (ID ID = 1; ID < num_intrinsics; ++ID) {
+ // This makes sure the vector does not keep growing, as well as after the
+ // first iteration does not result in additional allocations.
+ Table.clear();
+ getIntrinsicInfoTableEntries(ID, Table);
+ }
+ }
+}
+
+BENCHMARK(BM_GetIntrinsicInfoTableEntries);
+
+BENCHMARK_MAIN();
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 69520fdb03dc7c..afef8930669e84 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1381,22 +1381,24 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
void Intrinsic::getIntrinsicInfoTableEntries(ID id,
SmallVectorImpl<IITDescriptor> &T){
+ static_assert(sizeof(IIT_Table[0]) == 2,
+ "Expect 16-bit entries in IIT_Table");
// Check to see if the intrinsic's type was expressible by the table.
- unsigned TableVal = IIT_Table[id-1];
+ uint16_t TableVal = IIT_Table[id - 1];
// Decode the TableVal into an array of IITValues.
- SmallVector<unsigned char, 8> IITValues;
+ SmallVector<unsigned char> IITValues;
ArrayRef<unsigned char> IITEntries;
unsigned NextElt = 0;
- if ((TableVal >> 31) != 0) {
+ if (TableVal >> 15) {
// This is an offset into the IIT_LongEncodingTable.
IITEntries = IIT_LongEncodingTable;
// Strip sentinel bit.
- NextElt = (TableVal << 1) >> 1;
+ NextElt = TableVal & 0x7fff;
} else {
- // Decode the TableVal into an array of IITValues. If the entry was encoded
- // into a single word in the table itself, decode it now.
+ // If the entry was encoded into a single word in the table itself, decode
+ // it from an array of nibbles to an array of bytes.
do {
IITValues.push_back(TableVal & 0xF);
TableVal >>= 4;
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 09eb1ed5e1863b..3dd4767e965364 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -61,6 +61,7 @@ class IntrinsicEmitter {
void EmitIntrinsicToOverloadTable(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS);
void EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
+
void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints,
bool IsClang, raw_ostream &OS);
@@ -282,11 +283,34 @@ static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) {
return TypeSig;
}
+// Pack the type signature into 32-bit fixed encoding word.
+std::optional<unsigned> encodePacked(const TypeSigTy &TypeSig) {
+ if (TypeSig.size() > 8)
+ return std::nullopt;
+
+ unsigned Result = 0;
+ for (unsigned char C : reverse(TypeSig)) {
+ if (C > 15)
+ return std::nullopt;
+ Result = (Result << 4) | C;
+ }
+ return Result;
+}
+
void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
- // If we can compute a 32-bit fixed encoding for this intrinsic, do so and
+ // Note: the code below can be switched to use 32-bit fixed encoding by
+ // flipping the flag below.
+ constexpr bool Use16BitFixedEncoding = true;
+ using EncodingTy =
+ std::conditional_t<Use16BitFixedEncoding, uint16_t, unsigned>;
+ const unsigned Mask = Use16BitFixedEncoding ? 0x7FFF : 0x7FFFFFFF;
+ const unsigned MSBPostion = Use16BitFixedEncoding ? 15 : 31;
+ StringRef TypeName = Use16BitFixedEncoding ? "uint16_t" : "unsigned";
+
+ // If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and
// capture it in this vector, otherwise store a ~0U.
- std::vector<unsigned> FixedEncodings;
+ std::vector<EncodingTy> FixedEncodings;
SequenceToOffsetTable<TypeSigTy> LongEncodingTable;
FixedEncodings.reserve(Ints.size());
@@ -296,69 +320,59 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints,
// Get the signature for the intrinsic.
TypeSigTy TypeSig = ComputeTypeSignature(Int);
- // Check to see if we can encode it into a 32-bit word. We can only encode
- // 8 nibbles into a 32-bit word.
- if (TypeSig.size() <= 8) {
- // Attempt to pack elements of TypeSig into a 32-bit word, starting from
- // the most significant nibble.
- unsigned Result = 0;
- bool Failed = false;
- for (unsigned char C : reverse(TypeSig)) {
- if (C > 15) {
- Failed = true;
- break;
- }
- Result = (Result << 4) | C;
- }
-
- // If this could be encoded into a 31-bit word, return it.
- if (!Failed && (Result >> 31) == 0) {
- FixedEncodings.push_back(Result);
- continue;
- }
+ // Check to see if we can encode it into a 16/32 bit word.
+ std::optional<unsigned> Result = encodePacked(TypeSig);
+ if (Result && (*Result & Mask) == Result) {
+ FixedEncodings.push_back(static_cast<EncodingTy>(*Result));
+ continue;
}
- // Otherwise, we're going to unique the sequence into the
- // LongEncodingTable, and use its offset in the 32-bit table instead.
LongEncodingTable.add(TypeSig);
// This is a placehold that we'll replace after the table is laid out.
- FixedEncodings.push_back(~0U);
+ FixedEncodings.push_back(static_cast<EncodingTy>(~0U));
}
LongEncodingTable.layout();
- OS << R"(// Global intrinsic function declaration type table.
+ OS << formatv(R"(// Global intrinsic function declaration type table.
#ifdef GET_INTRINSIC_GENERATOR_GLOBAL
-static constexpr unsigned IIT_Table[] = {
- )";
+static constexpr {0} IIT_Table[] = {{
+ )",
+ TypeName);
+ unsigned MaxOffset = 0;
for (auto [Idx, FixedEncoding, Int] : enumerate(FixedEncodings, Ints)) {
if ((Idx & 7) == 7)
OS << "\n ";
// If the entry fit in the table, just emit it.
- if (FixedEncoding != ~0U) {
+ if ((FixedEncoding & Mask) == FixedEncoding) {
OS << "0x" << Twine::utohexstr(FixedEncoding) << ", ";
continue;
}
TypeSigTy TypeSig = ComputeTypeSignature(Int);
+ unsigned Offset = LongEncodingTable.get(TypeSig);
+ MaxOffset = std::max(MaxOffset, Offset);
// Otherwise, emit the offset into the long encoding table. We emit it this
// way so that it is easier to read the offset in the .def file.
- OS << "(1U<<31) | " << LongEncodingTable.get(TypeSig) << ", ";
+ OS << formatv("(1U<<{0}) | {1}, ", MSBPostion, Offset);
}
OS << "0\n};\n\n";
+ // verify that all offsets will fit in 16/32 bits.
+ if ((MaxOffset & Mask) != MaxOffset)
+ PrintFatalError("Offset of long encoding table exceeds encoding bits");
+
// Emit the shared table of register lists.
OS << "static constexpr unsigned char IIT_LongEncodingTable[] = {\n";
if (!LongEncodingTable.empty())
LongEncodingTable.emit(
OS, [](raw_ostream &OS, unsigned char C) { OS << (unsigned)C; });
- OS << " 255\n};\n\n";
-
+ OS << " 255\n};\n";
OS << "#endif\n\n"; // End of GET_INTRINSIC_GENERATOR_GLOBAL
}
|
a974951
to
5d3b486
Compare
5d3b486
to
0d0abc2
Compare
0d0abc2
to
36f1760
Compare
Add intrinsic emitter option to change the "fixed encoding" table used for encoding intrinsic type signature to use 16-bit encoding as opposed to 32-bit. To better segragate LLVM Core from this encoding detail, add a function `decodeIITFixedEncoding` to the emitted code to decode the fixed encoding. This allows TableGen intrinsic emitter to choose 16 or 32-bit fixed encoding withot changing the LLVM code. When using 16-bit encoding, we seem to reduce the total static storage size of this info by 50%. Currently measure data is as follows - Current size = 14193*4 + 16058 + 3 = 72833 bytes. - New size = 14193*2 + 19879 + 3 = 48268 bytes. - Reduction = 50.9%
36f1760
to
801af60
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/2271 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/92/builds/5816 Here is the relevant piece of the build log for the reference
|
Change the "fixed encoding" table used for encoding intrinsic
type signature to use 16-bit encoding as opposed to 32-bit.
This results in both space and time improvements. For space,
the total static storage size (in bytes) of this info reduces by 50%:
For time, with the added benchmark, we see a 7.3% speedup in
GetIntrinsicInfoTableEntries
benchmark. Actual output of thebenchmark in included in the GitHub MR.