Skip to content

Commit 9bb5556

Browse files
kyulee-comKyungwoo Lee
andauthored
Reland [CGData] llvm-cgdata #89884 (#101461)
Reland [CGData] llvm-cgdata #89884 using `Opt` instead of `cl` - Action options are required, `--convert`, `--show`, `--merge`. This was similar to sub-commands previously implemented, but having a prefix `--`. - `--format` option is added, which specifies `text` or `binary`. --------- Co-authored-by: Kyungwoo Lee <[email protected]>
1 parent 254da5a commit 9bb5556

30 files changed

+1871
-15
lines changed
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
//===- CodeGenData.h --------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains support for codegen data that has stable summary which
10+
// can be used to optimize the code in the subsequent codegen.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_CGDATA_CODEGENDATA_H
15+
#define LLVM_CGDATA_CODEGENDATA_H
16+
17+
#include "llvm/ADT/BitmaskEnum.h"
18+
#include "llvm/Bitcode/BitcodeReader.h"
19+
#include "llvm/CGData/OutlinedHashTree.h"
20+
#include "llvm/CGData/OutlinedHashTreeRecord.h"
21+
#include "llvm/IR/Module.h"
22+
#include "llvm/Object/ObjectFile.h"
23+
#include "llvm/Support/ErrorHandling.h"
24+
#include "llvm/TargetParser/Triple.h"
25+
#include <mutex>
26+
27+
namespace llvm {
28+
29+
enum CGDataSectKind {
30+
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
31+
#include "llvm/CGData/CodeGenData.inc"
32+
};
33+
34+
std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
35+
Triple::ObjectFormatType OF,
36+
bool AddSegmentInfo = true);
37+
38+
enum class CGDataKind {
39+
Unknown = 0x0,
40+
// A function outlining info.
41+
FunctionOutlinedHashTree = 0x1,
42+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
43+
};
44+
45+
const std::error_category &cgdata_category();
46+
47+
enum class cgdata_error {
48+
success = 0,
49+
eof,
50+
bad_magic,
51+
bad_header,
52+
empty_cgdata,
53+
malformed,
54+
unsupported_version,
55+
};
56+
57+
inline std::error_code make_error_code(cgdata_error E) {
58+
return std::error_code(static_cast<int>(E), cgdata_category());
59+
}
60+
61+
class CGDataError : public ErrorInfo<CGDataError> {
62+
public:
63+
CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
64+
: Err(Err), Msg(ErrStr.str()) {
65+
assert(Err != cgdata_error::success && "Not an error");
66+
}
67+
68+
std::string message() const override;
69+
70+
void log(raw_ostream &OS) const override { OS << message(); }
71+
72+
std::error_code convertToErrorCode() const override {
73+
return make_error_code(Err);
74+
}
75+
76+
cgdata_error get() const { return Err; }
77+
const std::string &getMessage() const { return Msg; }
78+
79+
/// Consume an Error and return the raw enum value contained within it, and
80+
/// the optional error message. The Error must either be a success value, or
81+
/// contain a single CGDataError.
82+
static std::pair<cgdata_error, std::string> take(Error E) {
83+
auto Err = cgdata_error::success;
84+
std::string Msg;
85+
handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
86+
assert(Err == cgdata_error::success && "Multiple errors encountered");
87+
Err = IPE.get();
88+
Msg = IPE.getMessage();
89+
});
90+
return {Err, Msg};
91+
}
92+
93+
static char ID;
94+
95+
private:
96+
cgdata_error Err;
97+
std::string Msg;
98+
};
99+
100+
enum CGDataMode {
101+
None,
102+
Read,
103+
Write,
104+
};
105+
106+
class CodeGenData {
107+
/// Global outlined hash tree that has oulined hash sequences across modules.
108+
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
109+
110+
/// This flag is set when -fcodegen-data-generate is passed.
111+
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
112+
bool EmitCGData;
113+
114+
/// This is a singleton instance which is thread-safe. Unlike profile data
115+
/// which is largely function-based, codegen data describes the whole module.
116+
/// Therefore, this can be initialized once, and can be used across modules
117+
/// instead of constructing the same one for each codegen backend.
118+
static std::unique_ptr<CodeGenData> Instance;
119+
static std::once_flag OnceFlag;
120+
121+
CodeGenData() = default;
122+
123+
public:
124+
~CodeGenData() = default;
125+
126+
static CodeGenData &getInstance();
127+
128+
/// Returns true if we have a valid outlined hash tree.
129+
bool hasOutlinedHashTree() {
130+
return PublishedHashTree && !PublishedHashTree->empty();
131+
}
132+
133+
/// Returns the outlined hash tree. This can be globally used in a read-only
134+
/// manner.
135+
const OutlinedHashTree *getOutlinedHashTree() {
136+
return PublishedHashTree.get();
137+
}
138+
139+
/// Returns true if we should write codegen data.
140+
bool emitCGData() { return EmitCGData; }
141+
142+
/// Publish the (globally) merged or read outlined hash tree.
143+
void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
144+
PublishedHashTree = std::move(HashTree);
145+
// Ensure we disable emitCGData as we do not want to read and write both.
146+
EmitCGData = false;
147+
}
148+
};
149+
150+
namespace cgdata {
151+
152+
inline bool hasOutlinedHashTree() {
153+
return CodeGenData::getInstance().hasOutlinedHashTree();
154+
}
155+
156+
inline const OutlinedHashTree *getOutlinedHashTree() {
157+
return CodeGenData::getInstance().getOutlinedHashTree();
158+
}
159+
160+
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
161+
162+
inline void
163+
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164+
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165+
}
166+
167+
void warn(Error E, StringRef Whence = "");
168+
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
169+
170+
} // end namespace cgdata
171+
172+
namespace IndexedCGData {
173+
174+
// A signature for data validation, representing "\xffcgdata\x81" in
175+
// little-endian order
176+
const uint64_t Magic = 0x81617461646763ff;
177+
178+
enum CGDataVersion {
179+
// Version 1 is the first version. This version supports the outlined
180+
// hash tree.
181+
Version1 = 1,
182+
CurrentVersion = CG_DATA_INDEX_VERSION
183+
};
184+
const uint64_t Version = CGDataVersion::CurrentVersion;
185+
186+
struct Header {
187+
uint64_t Magic;
188+
uint32_t Version;
189+
uint32_t DataKind;
190+
uint64_t OutlinedHashTreeOffset;
191+
192+
// New fields should only be added at the end to ensure that the size
193+
// computation is correct. The methods below need to be updated to ensure that
194+
// the new field is read correctly.
195+
196+
// Reads a header struct from the buffer.
197+
static Expected<Header> readFromBuffer(const unsigned char *Curr);
198+
};
199+
200+
} // end namespace IndexedCGData
201+
202+
} // end namespace llvm
203+
204+
#endif // LLVM_CODEGEN_PREPARE_H
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\
2+
|*
3+
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
|* See https://llvm.org/LICENSE.txt for license information.
5+
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
|*
7+
\*===----------------------------------------------------------------------===*/
8+
/*
9+
* This is the main file that defines all the data structure, signature,
10+
* constant literals that are shared across compiler, host tools (reader/writer)
11+
* to support codegen data.
12+
*
13+
\*===----------------------------------------------------------------------===*/
14+
15+
/* Helper macros. */
16+
#define CG_DATA_SIMPLE_QUOTE(x) #x
17+
#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x)
18+
19+
#ifdef CG_DATA_SECT_ENTRY
20+
#define CG_DATA_DEFINED
21+
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
22+
CG_DATA_OUTLINE_COFF, "__DATA,")
23+
24+
#undef CG_DATA_SECT_ENTRY
25+
#endif
26+
27+
/* section name strings common to all targets other
28+
than WIN32 */
29+
#define CG_DATA_OUTLINE_COMMON __llvm_outline
30+
/* Since cg data sections are not allocated, we don't need to
31+
* access them at runtime.
32+
*/
33+
#define CG_DATA_OUTLINE_COFF ".loutline"
34+
35+
#ifdef _WIN32
36+
/* Runtime section names and name strings. */
37+
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
38+
39+
#else
40+
/* Runtime section names and name strings. */
41+
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
42+
43+
#endif
44+
45+
/* Indexed codegen data format version (start from 1). */
46+
#define CG_DATA_INDEX_VERSION 1

0 commit comments

Comments
 (0)