Skip to content

Commit 36bb1fb

Browse files
committed
[MLInliner] Factor out logging
Factored out the logging facility, to allow its reuse outside the inliner. Differential Revision: https://reviews.llvm.org/D88770
1 parent c3e07a0 commit 36bb1fb

File tree

5 files changed

+296
-141
lines changed

5 files changed

+296
-141
lines changed

llvm/include/llvm/Analysis/Utils/TFUtils.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,64 @@ class TensorSpec final {
100100
Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
101101
const json::Value &Value);
102102

103+
/// Logging utility - given an ordered specification of features, and assuming
104+
/// a scalar reward, allow logging feature values and rewards, and then print
105+
/// as tf.train.SequenceExample text protobuf.
106+
/// The assumption is that, for an event to be logged (i.e. a set of feature
107+
/// values and a reward), the user calls the log* API for each feature exactly
108+
/// once, providing the index matching the position in the feature spec list
109+
/// provided at construction:
110+
/// event 0:
111+
/// logTensorValue(0, ...)
112+
/// logTensorValue(1, ...)
113+
/// ...
114+
/// logReward(...)
115+
/// event 1:
116+
/// logTensorValue(0, ...)
117+
/// logTensorValue(1, ...)
118+
/// ...
119+
/// logReward(...)
120+
///
121+
/// At the end, call print to generate the protobuf.
122+
class Logger final {
123+
public:
124+
struct LoggedFeatureSpec {
125+
TensorSpec Spec;
126+
Optional<std::string> LoggingName;
127+
};
128+
129+
/// Construct a Logger. If IncludeReward is false, then logReward shouldn't
130+
/// be called, and the reward feature won't be printed out.
131+
Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
132+
const TensorSpec &RewardSpec, bool IncludeReward)
133+
: FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
134+
RawLogData(FeatureSpecs.size() + IncludeReward),
135+
IncludeReward(IncludeReward) {}
136+
137+
template <typename T> void logReward(T Value) {
138+
assert(IncludeReward);
139+
logTensorValue(RawLogData.size() - 1, &Value);
140+
}
141+
142+
template <typename T>
143+
void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
144+
const char *Start = reinterpret_cast<const char *>(Value);
145+
const char *End = Start + sizeof(T) * Size;
146+
RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
147+
}
148+
149+
void print(raw_ostream &OS);
150+
151+
private:
152+
std::vector<LoggedFeatureSpec> FeatureSpecs;
153+
TensorSpec RewardSpec;
154+
/// RawData has one entry per feature, plus one more for the reward.
155+
/// Each feature's values are then stored in a vector, in succession.
156+
/// This means the ith event is stored at [*][i]
157+
std::vector<std::vector<char>> RawLogData;
158+
const bool IncludeReward;
159+
};
160+
103161
class TFModelEvaluator final {
104162
public:
105163
/// The result of a model evaluation. Handles the lifetime of the output

llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp

Lines changed: 49 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,11 @@ namespace {
7474
/// An InlineEvent, used by TrainingLogger.
7575
struct InlineEvent {
7676
/// What the default policy's decision would have been.
77-
bool DefaultDecision = false;
77+
int64_t DefaultDecision = 0;
7878

7979
/// What we advised. When training off the default policy, this is the same as
8080
/// DefaultDecision.
81-
bool AdvisedDecision = false;
81+
int64_t AdvisedDecision = 0;
8282

8383
/// What actually happened. This would be 'false' in the case of an inline
8484
/// error, even if AdvisedDecision were true, otherwise it agrees with
@@ -109,91 +109,16 @@ class TrainingLogger final {
109109
void print();
110110

111111
private:
112-
/// Write the values of one tensor as a list.
113-
template <typename T>
114-
void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData,
115-
size_t ElemCount) const {
116-
OutFile << "[";
117-
const T *TypedData = reinterpret_cast<const T *>(TensorData);
118-
for (size_t I = 0; I < ElemCount; ++I) {
119-
if (I > 0)
120-
OutFile << ", ";
121-
OutFile << TypedData[I];
122-
}
123-
OutFile << "]";
124-
}
125-
126-
/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
127-
/// The tensors are assumed to be stored contiguously, in row-major format,
128-
/// in the TensorData buffer. Each tensor has the shape given by Spec. The
129-
/// feature name in the output is either the provided LoggingName, if
130-
/// specified, otherwise it's the name of the tensor (as given by Spec).
131-
template <typename T>
132-
void
133-
writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
134-
const T *TensorData, size_t TensorCount,
135-
Optional<StringRef> LoggingName = None) const {
136-
writeRawTensorsAsFeatureLists(OutFile, Spec,
137-
reinterpret_cast<const char *>(TensorData),
138-
TensorCount, LoggingName);
139-
}
140-
141-
/// Untyped implementation of the API above.
142-
void
143-
writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
144-
const char *TensorData, size_t TensorCount,
145-
Optional<StringRef> LoggingName = None) const {
146-
const char *FieldName = "<invalid>";
147-
std::function<void(const char *)> ValueWriter;
148-
// The 'Feature' protobuf only has 3 possible fields: float_list,
149-
// int64_list, or bytes_list, so we capture int32 values as int64. We don't
150-
// support any other types.
151-
if (Spec.isElementType<int64_t>()) {
152-
FieldName = "int64_list";
153-
ValueWriter = [&](const char *Data) {
154-
writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
155-
};
156-
} else if (Spec.isElementType<int32_t>()) {
157-
FieldName = "int64_list";
158-
ValueWriter = [&](const char *Data) {
159-
writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
160-
};
161-
162-
} else if (Spec.isElementType<float>()) {
163-
FieldName = "float_list";
164-
ValueWriter = [&](const char *Data) {
165-
writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
166-
};
167-
168-
} else
169-
llvm_unreachable("Unsupported tensor type.");
170-
171-
OutFile << " feature_list: {\n";
172-
OutFile << " key: "
173-
<< "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" ";
174-
OutFile << "value: {\n";
175-
size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
176-
for (const char *P = TensorData,
177-
*E = TensorData + TensorByteSize * TensorCount;
178-
P < E; P += TensorByteSize) {
179-
OutFile << " feature: { " << FieldName << ": { value: ";
180-
ValueWriter(P);
181-
OutFile << " } }\n";
182-
}
183-
OutFile << " }\n";
184-
OutFile << " }\n";
185-
}
186-
187112
StringRef LogFileName;
188113
const ModelUnderTrainingRunner *const MUTR;
189-
std::vector<InlineFeatures> Features;
190-
std::vector<int64_t> DefaultDecisions;
191-
// We store all outputs as data blobs, but we always expect to have one, the
192-
// first one, representing the decision. While we could track that separately,
193-
// for uniformity, we store it, generically, here.
194-
std::vector<std::vector<char>> Outputs;
114+
std::unique_ptr<Logger> L;
195115
std::vector<bool> Effects;
196-
std::vector<int64_t> Rewards;
116+
/// There's at least one output. We'll set this to a different value if MUTR
117+
/// is avaliable.
118+
size_t OutputCount = 1;
119+
/// Set these 2 clearly OOB, to make sure we set them later.
120+
size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
121+
size_t DecisionPos = std::numeric_limits<size_t>::max();
197122
};
198123

199124
/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
@@ -331,8 +256,8 @@ class LoggingMLInlineAdvice : public MLInlineAdvice {
331256
TrainingLogger &Logger;
332257
const Optional<size_t> CallerSizeEstimateBefore;
333258
const Optional<size_t> CalleeSizeEstimateBefore;
334-
const bool DefaultDecision;
335-
const bool Mandatory;
259+
const int64_t DefaultDecision;
260+
const int64_t Mandatory;
336261
};
337262

338263
/// A pseudo model runner. We use it to store feature values when collecting
@@ -402,69 +327,62 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
402327
TrainingLogger::TrainingLogger(StringRef LogFileName,
403328
const ModelUnderTrainingRunner *MUTR)
404329
: LogFileName(LogFileName), MUTR(MUTR) {
330+
// The first output is the inlining decision.
331+
if (MUTR)
332+
OutputCount = MUTR->outputSpecs().size();
333+
std::vector<Logger::LoggedFeatureSpec> FT;
334+
405335
for (size_t I = 0; I < NumberOfFeatures; ++I)
406-
Features.push_back(InlineFeatures());
336+
FT.push_back(
337+
{TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
338+
for (size_t I = 1; I < OutputCount; ++I)
339+
FT.push_back({MUTR->outputSpecs()[I], MUTR->outputNames()[I]});
407340

408-
// The first output is the inlining decision.
409-
auto OutputCount = MUTR ? MUTR->outputSpecs().size() : 1;
410-
Outputs.assign(OutputCount, std::vector<char>());
341+
DefaultDecisionPos = FT.size();
342+
FT.push_back(
343+
{TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
344+
345+
DecisionPos = FT.size();
346+
FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
347+
348+
L = std::make_unique<Logger>(
349+
FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
350+
InlineSizeEstimatorAnalysis::isEvaluatorRequested());
411351
}
412352

413353
/// Log one inlining event.
414354
void TrainingLogger::logInlineEvent(const InlineEvent &Event,
415355
const MLModelRunner &ModelRunner) {
416-
for (size_t I = 0; I < NumberOfFeatures; ++I)
417-
Features[I].push_back(ModelRunner.getFeature(I));
356+
size_t CurrentFeature = 0;
357+
for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
358+
int64_t F = ModelRunner.getFeature(CurrentFeature);
359+
L->logTensorValue(CurrentFeature, &F);
360+
}
418361

419-
Effects.push_back(Event.Effect);
420-
Rewards.push_back(Event.Reward);
421-
DefaultDecisions.push_back(Event.DefaultDecision);
422-
int64_t Advice = static_cast<int64_t>(Event.AdvisedDecision);
423-
const char *AdviceData = reinterpret_cast<const char *>(&Advice);
424-
Outputs[0].insert(Outputs[0].end(), AdviceData, AdviceData + sizeof(int64_t));
425-
for (size_t I = 1; I < Outputs.size(); ++I) {
362+
for (size_t I = 1; I < OutputCount; ++I) {
426363
const auto &Result = *MUTR->lastEvaluationResult();
427364
auto &Spec = MUTR->outputSpecs()[I];
428365
const char *RawData =
429366
reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
430-
Outputs[I].insert(Outputs[I].end(), RawData,
431-
RawData +
432-
Spec.getElementCount() * Spec.getElementByteSize());
367+
L->logTensorValue(CurrentFeature, RawData,
368+
Spec.getElementCount() * Spec.getElementByteSize());
369+
++CurrentFeature;
433370
}
371+
372+
assert(CurrentFeature == DefaultDecisionPos);
373+
L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
374+
L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
375+
if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
376+
L->logReward(Event.Reward);
377+
378+
// For debugging / later use
379+
Effects.push_back(Event.Effect);
434380
}
435381

436382
void TrainingLogger::print() {
437383
std::error_code EC;
438384
raw_fd_ostream OutFile(LogFileName, EC);
439-
size_t NumberOfRecords = Rewards.size();
440-
if (NumberOfRecords == 0)
441-
return;
442-
443-
OutFile << "feature_lists: {\n";
444-
for (size_t I = 0; I < Features.size(); ++I)
445-
writeTensorsAsFeatureLists(
446-
OutFile, TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}),
447-
Features[I].data(), NumberOfRecords);
448-
449-
writeTensorsAsFeatureLists(
450-
OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}),
451-
DefaultDecisions.data(), NumberOfRecords);
452-
453-
writeRawTensorsAsFeatureLists(
454-
OutFile, TensorSpec::createSpec<int64_t>(DecisionName, {1}),
455-
Outputs[0].data(), NumberOfRecords);
456-
457-
if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
458-
writeTensorsAsFeatureLists(OutFile,
459-
TensorSpec::createSpec<int64_t>(RewardName, {1}),
460-
Rewards.data(), NumberOfRecords);
461-
462-
for (size_t I = 1; I < Outputs.size(); ++I)
463-
writeRawTensorsAsFeatureLists(OutFile, MUTR->outputSpecs()[I],
464-
Outputs[I].data(), NumberOfRecords,
465-
StringRef(MUTR->outputNames()[I]));
466-
467-
OutFile << "}\n";
385+
L->print(OutFile);
468386
}
469387

470388
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(

0 commit comments

Comments
 (0)