Skip to content

Commit 6d11baf

Browse files
committed
[mlgo] Stream the training data
This leverages the new logging format in that we don't need to buffer the training data, we can just write it out. Differential Revision: https://reviews.llvm.org/D142168
1 parent 3129bdc commit 6d11baf

File tree

6 files changed

+187
-299
lines changed

6 files changed

+187
-299
lines changed

llvm/include/llvm/Analysis/Utils/TrainingLogger.h

Lines changed: 25 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "llvm/Support/JSON.h"
6262

6363
#include <memory>
64+
#include <optional>
6465
#include <vector>
6566

6667
namespace llvm {
@@ -88,19 +89,18 @@ namespace llvm {
8889
/// Alternatively, don't call logReward at the end of each event, just
8990
/// log{Float|Int32|Int64}FinalReward at the end.
9091
class Logger final {
92+
std::unique_ptr<raw_ostream> OS;
9193
const std::vector<TensorSpec> FeatureSpecs;
9294
const TensorSpec RewardSpec;
9395
const bool IncludeReward;
94-
std::vector<std::unique_ptr<char[]>> FeatureStorage;
95-
std::vector<std::unique_ptr<char[]>> RewardStorage;
96-
raw_ostream &dumpHeader(raw_ostream &OS) const;
97-
raw_ostream &startContext(raw_ostream &OS, StringRef Name) const;
98-
raw_ostream &startObservation(raw_ostream &OS, size_t Nr) const;
99-
raw_ostream &writeOutcome(raw_ostream &OS, size_t CurrentObservationID) const;
100-
char *addNewTensor(size_t FeatureID);
101-
size_t getNrRecords() const;
96+
StringMap<size_t> ObservationIDs;
97+
std::string CurrentContext;
10298

103-
void logRewardImpl(const char *Value, size_t Size);
99+
void writeHeader();
100+
void writeTensor(const TensorSpec &Spec, const char *RawData) {
101+
OS->write(RawData, Spec.getTotalTensorBufferSize());
102+
}
103+
void logRewardImpl(const char *RawData);
104104

105105
public:
106106
/// Construct a Logger. If IncludeReward is false, then logReward or
@@ -109,44 +109,27 @@ class Logger final {
109109
/// NOTE: the FeatureSpecs are expected to be in the same order (i.e. have
110110
/// corresponding indices) with any MLModelRunner implementations
111111
/// corresponding to the model being trained/logged.
112-
Logger(const std::vector<TensorSpec> &FeatureSpecs,
113-
const TensorSpec &RewardSpec, bool IncludeReward)
114-
: FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
115-
IncludeReward(IncludeReward) {}
116-
117-
template <typename T> void logReward(T Value) {
118-
logRewardImpl(reinterpret_cast<const char *>(&Value), sizeof(T));
119-
}
120-
void logFloatReward(float Value);
121-
void logInt32Reward(int32_t Value);
122-
void logInt64Reward(int64_t Value);
123-
124-
void logFloatFinalReward(float Value);
125-
void logInt32FinalReward(int32_t Value);
126-
void logInt64FinalReward(int64_t Value);
112+
Logger(std::unique_ptr<raw_ostream> OS,
113+
const std::vector<TensorSpec> &FeatureSpecs,
114+
const TensorSpec &RewardSpec, bool IncludeReward);
127115

128-
void logFloatValue(size_t FeatureID, const float *Value);
129-
void logInt32Value(size_t FeatureID, const int32_t *Value);
130-
void logInt64Value(size_t FeatureID, const int64_t *Value);
116+
void switchContext(StringRef Name);
117+
void startObservation();
118+
void endObservation();
131119

132-
void logSpecifiedTensorValue(size_t FeatureID, const char *RawData);
120+
const std::string &currentContext() const { return CurrentContext; }
133121

134-
// Warning! For int32_t, the return is set up for int64_t, so the caller needs
135-
// to piecemeal cast their int32_t values.
136-
// FIXME: let's drop int32_t support. While it's supported by evaluator, it's
137-
// not supported by the tensorflow::SequenceExample proto. For small values,
138-
// we can consider using bytes.
139-
char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
122+
bool hasObservationInProgress() const {
123+
return ObservationIDs.find(CurrentContext) != ObservationIDs.end();
124+
}
140125

141-
// Flush the content of the log to the stream, clearing the stored data in the
142-
// process.
143-
raw_ostream &flush(raw_ostream &OS, bool WithHeader = true,
144-
StringRef Context = "default") const;
126+
template <typename T> void logReward(T Value) {
127+
logRewardImpl(reinterpret_cast<const char *>(&Value));
128+
}
145129

146-
// Flush a set of logs that are produced from the same module, e.g.
147-
// per-function regalloc traces.
148-
static void flushLogs(raw_ostream &OS,
149-
const StringMap<std::unique_ptr<Logger>> &Loggers);
130+
void logTensorValue(size_t FeatureID, const char *RawData) {
131+
writeTensor(FeatureSpecs[FeatureID], RawData);
132+
}
150133
};
151134

152135
} // namespace llvm

llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,6 @@ class TrainingLogger final {
102102
void logInlineEvent(const InlineEvent &Event,
103103
const MLModelRunner &ModelRunner);
104104

105-
/// Print the stored tensors.
106-
void print();
107-
108105
private:
109106
StringRef LogFileName;
110107
const ModelUnderTrainingRunner *const MUTR;
@@ -150,7 +147,6 @@ class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
150147

151148
size_t getTotalSizeEstimate();
152149

153-
virtual ~DevelopmentModeMLInlineAdvisor();
154150
void updateNativeSizeEstimate(int64_t Change) {
155151
*CurrentNativeSize += Change;
156152
}
@@ -288,45 +284,48 @@ TrainingLogger::TrainingLogger(StringRef LogFileName,
288284

289285
DecisionPos = FT.size();
290286
FT.push_back(TensorSpec::createSpec<int64_t>(DecisionName, {1}));
287+
std::error_code EC;
288+
auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
289+
if (EC)
290+
dbgs() << (EC.message() + ":" + TrainingLog);
291291

292292
L = std::make_unique<Logger>(
293-
FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
293+
std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
294294
InlineSizeEstimatorAnalysis::isEvaluatorRequested());
295+
L->switchContext("");
295296
}
296297

297298
/// Log one inlining event.
298299
void TrainingLogger::logInlineEvent(const InlineEvent &Event,
299300
const MLModelRunner &ModelRunner) {
301+
L->startObservation();
300302
size_t CurrentFeature = 0;
301-
for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
302-
int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature);
303-
L->logInt64Value(CurrentFeature, &F);
304-
}
303+
for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature)
304+
L->logTensorValue(CurrentFeature,
305+
reinterpret_cast<const char *>(
306+
ModelRunner.getTensorUntyped(CurrentFeature)));
305307

306308
if (MUTR)
307309
for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
308310
const char *RawData =
309311
reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
310-
L->logSpecifiedTensorValue(CurrentFeature, RawData);
312+
L->logTensorValue(CurrentFeature, RawData);
311313
++CurrentFeature;
312314
}
313315

314316
assert(CurrentFeature == DefaultDecisionPos);
315-
L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
316-
L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
317+
L->logTensorValue(DefaultDecisionPos,
318+
reinterpret_cast<const char *>(&Event.DefaultDecision));
319+
L->logTensorValue(DecisionPos,
320+
reinterpret_cast<const char *>(&Event.AdvisedDecision));
321+
L->endObservation();
317322
if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
318-
L->logInt64Reward(Event.Reward);
323+
L->logReward(Event.Reward);
319324

320325
// For debugging / later use
321326
Effects.push_back(Event.Effect);
322327
}
323328

324-
void TrainingLogger::print() {
325-
std::error_code EC;
326-
raw_fd_ostream OutFile(LogFileName, EC);
327-
L->flush(OutFile);
328-
}
329-
330329
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
331330
Module &M, ModuleAnalysisManager &MAM,
332331
std::unique_ptr<MLModelRunner> ModelRunner,
@@ -342,11 +341,6 @@ DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
342341
assert(IsDoingInference || isLogging());
343342
}
344343

345-
DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
346-
if (isLogging())
347-
Logger->print();
348-
}
349-
350344
std::optional<size_t>
351345
DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
352346
if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())

llvm/lib/Analysis/TrainingLogger.cpp

Lines changed: 32 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ static cl::opt<bool>
3232
UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
3333
cl::desc("Output simple (non-protobuf) log."));
3434

35-
raw_ostream &Logger::dumpHeader(raw_ostream &OS) const {
36-
json::OStream JOS(OS);
35+
void Logger::writeHeader() {
36+
json::OStream JOS(*OS);
3737
JOS.object([&]() {
3838
JOS.attributeArray("features", [&]() {
3939
for (const auto &TS : FeatureSpecs)
@@ -45,140 +45,44 @@ raw_ostream &Logger::dumpHeader(raw_ostream &OS) const {
4545
JOS.attributeEnd();
4646
}
4747
});
48-
OS << "\n";
49-
return OS;
48+
*OS << "\n";
5049
}
5150

52-
raw_ostream &Logger::startContext(raw_ostream &OS, StringRef Name) const {
53-
json::OStream JOS(OS);
51+
void Logger::switchContext(StringRef Name) {
52+
CurrentContext = Name.str();
53+
json::OStream JOS(*OS);
5454
JOS.object([&]() { JOS.attribute("context", Name); });
55-
OS << "\n";
56-
return OS;
55+
*OS << "\n";
5756
}
5857

59-
raw_ostream &Logger::startObservation(raw_ostream &OS, size_t Nr) const {
60-
json::OStream JOS(OS);
61-
JOS.object([&]() { JOS.attribute("observation", static_cast<int64_t>(Nr)); });
62-
OS << "\n";
63-
return OS;
64-
}
65-
66-
raw_ostream &Logger::writeOutcome(raw_ostream &OS,
67-
size_t CurrentObservationID) const {
68-
if (IncludeReward) {
69-
OS << "\n";
70-
json::OStream JOS(OS);
71-
JOS.object([&]() {
72-
JOS.attribute("outcome", static_cast<int64_t>(CurrentObservationID));
73-
});
74-
OS << "\n";
75-
OS.write(RewardStorage[CurrentObservationID].get(),
76-
RewardSpec.getTotalTensorBufferSize());
77-
}
78-
OS << "\n";
79-
return OS;
80-
}
81-
82-
char *Logger::addNewTensor(size_t FeatureID) {
83-
return FeatureStorage
84-
.emplace_back(
85-
new char[FeatureSpecs[FeatureID].getTotalTensorBufferSize()])
86-
.get();
87-
}
88-
89-
size_t Logger::getNrRecords() const {
90-
assert(FeatureStorage.size() % FeatureSpecs.size() == 0);
91-
return FeatureStorage.size() / FeatureSpecs.size();
92-
}
93-
94-
void Logger::logRewardImpl(const char *Value, size_t Size) {
95-
std::memcpy(RewardStorage.emplace_back(new char[Size]).get(), Value, Size);
96-
}
97-
98-
raw_ostream &Logger::flush(raw_ostream &OS, bool WithHeader,
99-
StringRef Context) const {
100-
if (WithHeader)
101-
dumpHeader(OS);
102-
startContext(OS, Context);
103-
size_t CurrentObservationID = 0;
104-
for (size_t I = 0; I < FeatureStorage.size(); ++I) {
105-
size_t TensorID = I % FeatureSpecs.size();
106-
if (TensorID == 0) {
107-
CurrentObservationID = I / FeatureSpecs.size();
108-
startObservation(OS, CurrentObservationID);
109-
}
110-
OS.write(FeatureStorage[I].get(),
111-
FeatureSpecs[TensorID].getTotalTensorBufferSize());
112-
if (TensorID == FeatureSpecs.size() - 1) {
113-
writeOutcome(OS, CurrentObservationID);
114-
}
115-
}
116-
return OS;
117-
}
118-
119-
#define LOG_REWARD(NAME, TYPE) \
120-
void Logger::log##NAME##Reward(TYPE Value) { \
121-
assert(IncludeReward); \
122-
(void)IncludeReward; \
123-
logReward(Value); \
124-
}
125-
126-
LOG_REWARD(Float, float)
127-
LOG_REWARD(Int32, int32_t)
128-
LOG_REWARD(Int64, int64_t)
129-
#undef LOG_REWARD
130-
131-
#define LOG_FINAL_REWARD(NAME, TYPE) \
132-
void Logger::log##NAME##FinalReward(TYPE Value) { \
133-
assert(RewardSpec.isElementType<TYPE>()); \
134-
for (size_t I = 1; I < getNrRecords(); ++I) \
135-
log##NAME##Reward(0); \
136-
log##NAME##Reward(Value); \
137-
}
138-
139-
LOG_FINAL_REWARD(Float, float)
140-
LOG_FINAL_REWARD(Int32, int32_t)
141-
LOG_FINAL_REWARD(Int64, int64_t)
142-
#undef LOG_FINAL_REWARD
143-
144-
void Logger::logFloatValue(size_t FeatureID, const float *Value) {
145-
assert(FeatureSpecs[FeatureID].isElementType<float>());
146-
logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
147-
}
148-
149-
void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) {
150-
assert(FeatureSpecs[FeatureID].isElementType<int64_t>());
151-
logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
58+
void Logger::startObservation() {
59+
auto I = ObservationIDs.insert({CurrentContext, 0});
60+
size_t NewObservationID = I.second ? 0 : ++I.first->second;
61+
json::OStream JOS(*OS);
62+
JOS.object([&]() {
63+
JOS.attribute("observation", static_cast<int64_t>(NewObservationID));
64+
});
65+
*OS << "\n";
15266
}
15367

154-
void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) {
155-
assert(FeatureSpecs[FeatureID].isElementType<int32_t>());
156-
logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
157-
}
68+
void Logger::endObservation() { *OS << "\n"; }
15869

159-
void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) {
160-
const auto &Spec = FeatureSpecs[FeatureID];
161-
char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID);
162-
if (Spec.isElementType<int32_t>())
163-
for (size_t I = 0; I < Spec.getElementCount(); ++I)
164-
(reinterpret_cast<int64_t *>(Buff))[I] =
165-
static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]);
166-
else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>())
167-
std::memcpy(Buff, RawData,
168-
Spec.getElementCount() * Spec.getElementByteSize());
169-
else
170-
llvm_unreachable("Unsupported tensor type");
171-
}
172-
173-
char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
174-
return reinterpret_cast<char *>(addNewTensor(FeatureID));
70+
void Logger::logRewardImpl(const char *RawData) {
71+
assert(IncludeReward);
72+
json::OStream JOS(*OS);
73+
JOS.object([&]() {
74+
JOS.attribute("outcome", static_cast<int64_t>(
75+
ObservationIDs.find(CurrentContext)->second));
76+
});
77+
*OS << "\n";
78+
writeTensor(RewardSpec, RawData);
79+
*OS << "\n";
17580
}
17681

177-
void Logger::flushLogs(raw_ostream &OS,
178-
const StringMap<std::unique_ptr<Logger>> &Loggers) {
179-
bool IsFirst = true;
180-
for (const auto &NamedLogger : Loggers) {
181-
NamedLogger.second->flush(OS, IsFirst, NamedLogger.first());
182-
IsFirst = false;
183-
}
82+
Logger::Logger(std::unique_ptr<raw_ostream> OS,
83+
const std::vector<TensorSpec> &FeatureSpecs,
84+
const TensorSpec &RewardSpec, bool IncludeReward)
85+
: OS(std::move(OS)), FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
86+
IncludeReward(IncludeReward) {
87+
writeHeader();
18488
}

0 commit comments

Comments
 (0)