Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit e153fb3

Browse files
committed
ProfileData: Add support for the indexed instrprof format
This adds support for an indexed instrumentation based profiling format, which is just a small header and an on disk hash table. This format will be used by clang's -fprofile-instr-use= for PGO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206656 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 4c464de commit e153fb3

File tree

10 files changed

+377
-35
lines changed

10 files changed

+377
-35
lines changed

include/llvm/ProfileData/InstrProf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ struct instrprof_error {
2929
bad_magic,
3030
bad_header,
3131
unsupported_version,
32+
unsupported_hash_type,
3233
too_large,
3334
truncated,
3435
malformed,

include/llvm/ProfileData/InstrProfReader.h

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
#define LLVM_PROFILEDATA_INSTRPROF_READER_H_
1717

1818
#include "llvm/ADT/ArrayRef.h"
19+
#include "llvm/ADT/StringExtras.h"
1920
#include "llvm/ProfileData/InstrProf.h"
2021
#include "llvm/Support/LineIterator.h"
2122
#include "llvm/Support/MemoryBuffer.h"
22-
#include "llvm/Support/Endian.h"
23+
#include "llvm/Support/EndianStream.h"
24+
#include "llvm/Support/OnDiskHashTable.h"
2325

2426
#include <iterator>
2527

@@ -29,6 +31,9 @@ class InstrProfReader;
2931

3032
/// Profiling information for a single function.
3133
struct InstrProfRecord {
34+
InstrProfRecord() {}
35+
InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
36+
: Name(Name), Hash(Hash), Counts(Counts) {}
3237
StringRef Name;
3338
uint64_t Hash;
3439
ArrayRef<uint64_t> Counts;
@@ -191,6 +196,106 @@ class RawInstrProfReader : public InstrProfReader {
191196
typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
192197
typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
193198

199+
namespace IndexedInstrProf {
200+
enum class HashT : uint32_t;
201+
uint64_t ComputeHash(HashT Type, StringRef K);
202+
}
203+
204+
/// Trait for lookups into the on-disk hash table for the binary instrprof
205+
/// format.
206+
class InstrProfLookupTrait {
207+
std::vector<uint64_t> CountBuffer;
208+
IndexedInstrProf::HashT HashType;
209+
public:
210+
InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
211+
212+
typedef InstrProfRecord data_type;
213+
typedef StringRef internal_key_type;
214+
typedef StringRef external_key_type;
215+
typedef uint64_t hash_value_type;
216+
typedef uint64_t offset_type;
217+
218+
static bool EqualKey(StringRef A, StringRef B) { return A == B; }
219+
static StringRef GetInternalKey(StringRef K) { return K; }
220+
221+
hash_value_type ComputeHash(StringRef K) {
222+
return IndexedInstrProf::ComputeHash(HashType, K);
223+
}
224+
225+
static std::pair<offset_type, offset_type>
226+
ReadKeyDataLength(const unsigned char *&D) {
227+
using namespace support;
228+
return std::make_pair(endian::readNext<offset_type, little, unaligned>(D),
229+
endian::readNext<offset_type, little, unaligned>(D));
230+
}
231+
232+
StringRef ReadKey(const unsigned char *D, unsigned N) {
233+
return StringRef((const char *)D, N);
234+
}
235+
236+
InstrProfRecord ReadData(StringRef K, const unsigned char *D, unsigned N) {
237+
if (N < 2 * sizeof(uint64_t) || N % sizeof(uint64_t)) {
238+
// The data is corrupt, don't try to read it.
239+
CountBuffer.clear();
240+
return InstrProfRecord("", 0, CountBuffer);
241+
}
242+
243+
using namespace support;
244+
245+
// The first stored value is the hash.
246+
uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
247+
// Each counter follows.
248+
unsigned NumCounters = N / sizeof(uint64_t) - 1;
249+
CountBuffer.clear();
250+
CountBuffer.reserve(NumCounters - 1);
251+
for (unsigned I = 0; I < NumCounters; ++I)
252+
CountBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
253+
254+
return InstrProfRecord(K, Hash, CountBuffer);
255+
}
256+
};
257+
typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
258+
InstrProfReaderIndex;
259+
260+
/// Reader for the indexed binary instrprof format.
261+
class IndexedInstrProfReader : public InstrProfReader {
262+
private:
263+
/// The profile data file contents.
264+
std::unique_ptr<MemoryBuffer> DataBuffer;
265+
/// The index into the profile data.
266+
std::unique_ptr<InstrProfReaderIndex> Index;
267+
/// Iterator over the profile data.
268+
InstrProfReaderIndex::data_iterator RecordIterator;
269+
/// The maximal execution count among all fucntions.
270+
uint64_t MaxFunctionCount;
271+
272+
IndexedInstrProfReader(const IndexedInstrProfReader &) LLVM_DELETED_FUNCTION;
273+
IndexedInstrProfReader &operator=(const IndexedInstrProfReader &)
274+
LLVM_DELETED_FUNCTION;
275+
public:
276+
IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
277+
: DataBuffer(std::move(DataBuffer)), Index(nullptr),
278+
RecordIterator(InstrProfReaderIndex::data_iterator()) {}
279+
280+
/// Return true if the given buffer is in an indexed instrprof format.
281+
static bool hasFormat(const MemoryBuffer &DataBuffer);
282+
283+
/// Read the file header.
284+
error_code readHeader() override;
285+
/// Read a single record.
286+
error_code readNextRecord(InstrProfRecord &Record) override;
287+
288+
/// Fill Counts with the profile data for the given function name.
289+
error_code getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
290+
std::vector<uint64_t> &Counts);
291+
/// Return the maximum of all known function counts.
292+
uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
293+
294+
/// Factory method to create an indexed reader.
295+
static error_code create(std::string Path,
296+
std::unique_ptr<IndexedInstrProfReader> &Result);
297+
};
298+
194299
} // end namespace llvm
195300

196301
#endif // LLVM_PROFILEDATA_INSTRPROF_READER_H_

include/llvm/ProfileData/InstrProfWriter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class InstrProfWriter {
4141
error_code addFunctionCounts(StringRef FunctionName, uint64_t FunctionHash,
4242
ArrayRef<uint64_t> Counters);
4343
/// Ensure that all data is written to disk.
44-
void write(raw_ostream &OS);
44+
void write(raw_fd_ostream &OS);
4545
};
4646

4747
} // end namespace llvm

lib/ProfileData/InstrProf.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category {
3333
return "Invalid header";
3434
case instrprof_error::unsupported_version:
3535
return "Unsupported format version";
36+
case instrprof_error::unsupported_hash_type:
37+
return "Unsupported hash function";
3638
case instrprof_error::too_large:
3739
return "Too much profile data";
3840
case instrprof_error::truncated:

lib/ProfileData/InstrProfIndexed.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// Shared header for the instrumented profile data reader and writer.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
15+
#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
16+
17+
#include "llvm/Support/MD5.h"
18+
19+
namespace llvm {
20+
21+
namespace IndexedInstrProf {
22+
enum class HashT : uint32_t {
23+
MD5,
24+
25+
Last = MD5
26+
};
27+
28+
static inline uint64_t MD5Hash(StringRef Str) {
29+
MD5 Hash;
30+
Hash.update(Str);
31+
llvm::MD5::MD5Result Result;
32+
Hash.final(Result);
33+
// Return the least significant 8 bytes. Our MD5 implementation returns the
34+
// result in little endian, so we may need to swap bytes.
35+
using namespace llvm::support;
36+
return endian::read<uint64_t, little, unaligned>(Result);
37+
}
38+
39+
uint64_t ComputeHash(HashT Type, StringRef K) {
40+
switch (Type) {
41+
case HashT::MD5:
42+
return IndexedInstrProf::MD5Hash(K);
43+
}
44+
llvm_unreachable("Unhandled hash type");
45+
}
46+
47+
const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
48+
const uint64_t Version = 1;
49+
const HashT HashType = HashT::MD5;
50+
}
51+
52+
} // end namespace llvm
53+
54+
#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_

lib/ProfileData/InstrProfReader.cpp

Lines changed: 112 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,62 @@
1515
#include "llvm/ProfileData/InstrProfReader.h"
1616
#include "llvm/ProfileData/InstrProf.h"
1717

18+
#include "InstrProfIndexed.h"
19+
1820
#include <cassert>
1921

2022
using namespace llvm;
2123

22-
error_code InstrProfReader::create(std::string Path,
23-
std::unique_ptr<InstrProfReader> &Result) {
24-
std::unique_ptr<MemoryBuffer> Buffer;
24+
static error_code setupMemoryBuffer(std::string Path,
25+
std::unique_ptr<MemoryBuffer> &Buffer) {
2526
if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
2627
return EC;
2728

2829
// Sanity check the file.
2930
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
3031
return instrprof_error::too_large;
32+
return instrprof_error::success;
33+
}
34+
35+
static error_code initializeReader(InstrProfReader &Reader) {
36+
return Reader.readHeader();
37+
}
38+
39+
error_code InstrProfReader::create(std::string Path,
40+
std::unique_ptr<InstrProfReader> &Result) {
41+
// Set up the buffer to read.
42+
std::unique_ptr<MemoryBuffer> Buffer;
43+
if (error_code EC = setupMemoryBuffer(Path, Buffer))
44+
return EC;
3145

3246
// Create the reader.
33-
if (RawInstrProfReader64::hasFormat(*Buffer))
47+
if (IndexedInstrProfReader::hasFormat(*Buffer))
48+
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
49+
else if (RawInstrProfReader64::hasFormat(*Buffer))
3450
Result.reset(new RawInstrProfReader64(std::move(Buffer)));
3551
else if (RawInstrProfReader32::hasFormat(*Buffer))
3652
Result.reset(new RawInstrProfReader32(std::move(Buffer)));
3753
else
3854
Result.reset(new TextInstrProfReader(std::move(Buffer)));
3955

40-
// Read the header and return the result.
41-
return Result->readHeader();
56+
// Initialize the reader and return the result.
57+
return initializeReader(*Result);
58+
}
59+
60+
error_code IndexedInstrProfReader::create(
61+
std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
62+
// Set up the buffer to read.
63+
std::unique_ptr<MemoryBuffer> Buffer;
64+
if (error_code EC = setupMemoryBuffer(Path, Buffer))
65+
return EC;
66+
67+
// Create the reader.
68+
if (!IndexedInstrProfReader::hasFormat(*Buffer))
69+
return instrprof_error::bad_magic;
70+
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
71+
72+
// Initialize the reader and return the result.
73+
return initializeReader(*Result);
4274
}
4375

4476
void InstrProfIterator::Increment() {
@@ -210,3 +242,77 @@ namespace llvm {
210242
template class RawInstrProfReader<uint32_t>;
211243
template class RawInstrProfReader<uint64_t>;
212244
}
245+
246+
bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
247+
if (DataBuffer.getBufferSize() < 8)
248+
return false;
249+
using namespace support;
250+
uint64_t Magic =
251+
endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
252+
return Magic == IndexedInstrProf::Magic;
253+
}
254+
255+
error_code IndexedInstrProfReader::readHeader() {
256+
const unsigned char *Start = (unsigned char *)DataBuffer->getBufferStart();
257+
const unsigned char *Cur = Start;
258+
if ((unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
259+
return error(instrprof_error::truncated);
260+
261+
using namespace support;
262+
263+
// Check the magic number.
264+
uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
265+
if (Magic != IndexedInstrProf::Magic)
266+
return error(instrprof_error::bad_magic);
267+
268+
// Read the version.
269+
uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
270+
if (Version != IndexedInstrProf::Version)
271+
return error(instrprof_error::unsupported_version);
272+
273+
// Read the maximal function count.
274+
MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
275+
276+
// Read the hash type and start offset.
277+
IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
278+
endian::readNext<uint64_t, little, unaligned>(Cur));
279+
if (HashType > IndexedInstrProf::HashT::Last)
280+
return error(instrprof_error::unsupported_hash_type);
281+
uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
282+
283+
// The rest of the file is an on disk hash table.
284+
Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
285+
InstrProfLookupTrait(HashType)));
286+
// Set up our iterator for readNextRecord.
287+
RecordIterator = Index->data_begin();
288+
289+
return success();
290+
}
291+
292+
error_code IndexedInstrProfReader::getFunctionCounts(
293+
StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
294+
const auto &Iter = Index->find(FuncName);
295+
if (Iter == Index->end())
296+
return error(instrprof_error::unknown_function);
297+
298+
// Found it. Make sure it's valid before giving back a result.
299+
const InstrProfRecord &Record = *Iter;
300+
if (Record.Name.empty())
301+
return error(instrprof_error::malformed);
302+
FuncHash = Record.Hash;
303+
Counts = Record.Counts;
304+
return success();
305+
}
306+
307+
error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
308+
// Are we out of records?
309+
if (RecordIterator == Index->data_end())
310+
return error(instrprof_error::eof);
311+
312+
// Read the next one.
313+
Record = *RecordIterator;
314+
++RecordIterator;
315+
if (Record.Name.empty())
316+
return error(instrprof_error::malformed);
317+
return success();
318+
}

0 commit comments

Comments
 (0)