rust-lang
diff --git a/‎llvm/include/llvm/Analysis/Utils/TFUtils.h
Lines changed: 58 additions & 0 deletions b/‎llvm/include/llvm/Analysis/Utils/TFUtils.h
Lines changed: 58 additions & 0 deletions
diff --git a/‎llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
Lines changed: 49 additions & 131 deletions b/‎llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
Lines changed: 49 additions & 131 deletions
@@ -100,6 +100,64 @@ class TensorSpec final {
 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
                                            const json::Value &Value);
 
+/// Logging utility - given an ordered specification of features, and assuming
+/// a scalar reward, allow logging feature values and rewards, and then print
+/// as tf.train.SequenceExample text protobuf.
+/// The assumption is that, for an event to be logged (i.e. a set of feature
+/// values and a reward), the user calls the log* API for each feature exactly
+/// once, providing the index matching the position in the feature spec list
+/// provided at construction:
+/// event 0:
+///   logTensorValue(0, ...)
+///   logTensorValue(1, ...)
+///   ...
+///   logReward(...)
+/// event 1:
+///   logTensorValue(0, ...)
+///   logTensorValue(1, ...)
+///   ...
+///   logReward(...)
+///
+/// At the end, call print to generate the protobuf.
+class Logger final {
+public:
+  struct LoggedFeatureSpec {
+    TensorSpec Spec;
+    Optional<std::string> LoggingName;
+  };
+
+  /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
+  /// be called, and the reward feature won't be printed out.
+  Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
+         const TensorSpec &RewardSpec, bool IncludeReward)
+      : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
+        RawLogData(FeatureSpecs.size() + IncludeReward),
+        IncludeReward(IncludeReward) {}
+
+  template <typename T> void logReward(T Value) {
+    assert(IncludeReward);
+    logTensorValue(RawLogData.size() - 1, &Value);
+  }
+
+  template <typename T>
+  void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
+    const char *Start = reinterpret_cast<const char *>(Value);
+    const char *End = Start + sizeof(T) * Size;
+    RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
+  }
+
+  void print(raw_ostream &OS);
+
+private:
+  std::vector<LoggedFeatureSpec> FeatureSpecs;
+  TensorSpec RewardSpec;
+  /// RawData has one entry per feature, plus one more for the reward.
+  /// Each feature's values are then stored in a vector, in succession.
+  /// This means the ith event is stored at [*][i]
+  std::vector<std::vector<char>> RawLogData;
+  const bool IncludeReward;
+};
+
 class TFModelEvaluator final {
 public:
   /// The result of a model evaluation. Handles the lifetime of the output
 
@@ -74,11 +74,11 @@ namespace {
 /// An InlineEvent, used by TrainingLogger.
 struct InlineEvent {
   /// What the default policy's decision would have been.
-  bool DefaultDecision = false;
+  int64_t DefaultDecision = 0;
 
   /// What we advised. When training off the default policy, this is the same as
   /// DefaultDecision.
-  bool AdvisedDecision = false;
+  int64_t AdvisedDecision = 0;
 
   /// What actually happened. This would be 'false' in the case of an inline
   /// error, even if AdvisedDecision were true, otherwise it agrees with
@@ -109,91 +109,16 @@ class TrainingLogger final {
   void print();
 
 private:
-  /// Write the values of one tensor as a list.
-  template <typename T>
-  void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData,
-                         size_t ElemCount) const {
-    OutFile << "[";
-    const T *TypedData = reinterpret_cast<const T *>(TensorData);
-    for (size_t I = 0; I < ElemCount; ++I) {
-      if (I > 0)
-        OutFile << ", ";
-      OutFile << TypedData[I];
-    }
-    OutFile << "]";
-  }
-
-  /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
-  /// The tensors are assumed to be stored contiguously, in row-major format,
-  /// in the TensorData buffer. Each tensor has the shape given by Spec. The
-  /// feature name in the output is either the provided LoggingName, if
-  /// specified, otherwise it's the name of the tensor (as given by Spec).
-  template <typename T>
-  void
-  writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
-                             const T *TensorData, size_t TensorCount,
-                             Optional<StringRef> LoggingName = None) const {
-    writeRawTensorsAsFeatureLists(OutFile, Spec,
-                                  reinterpret_cast<const char *>(TensorData),
-                                  TensorCount, LoggingName);
-  }
-
-  /// Untyped implementation of the API above.
-  void
-  writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
-                                const char *TensorData, size_t TensorCount,
-                                Optional<StringRef> LoggingName = None) const {
-    const char *FieldName = "<invalid>";
-    std::function<void(const char *)> ValueWriter;
-    // The 'Feature' protobuf only has 3 possible fields: float_list,
-    // int64_list, or bytes_list, so we capture int32 values as int64. We don't
-    // support any other types.
-    if (Spec.isElementType<int64_t>()) {
-      FieldName = "int64_list";
-      ValueWriter = [&](const char *Data) {
-        writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
-      };
-    } else if (Spec.isElementType<int32_t>()) {
-      FieldName = "int64_list";
-      ValueWriter = [&](const char *Data) {
-        writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
-      };
-
-    } else if (Spec.isElementType<float>()) {
-      FieldName = "float_list";
-      ValueWriter = [&](const char *Data) {
-        writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
-      };
-
-    } else
-      llvm_unreachable("Unsupported tensor type.");
-
-    OutFile << "  feature_list: {\n";
-    OutFile << "    key: "
-            << "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" ";
-    OutFile << "value: {\n";
-    size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
-    for (const char *P = TensorData,
-                    *E = TensorData + TensorByteSize * TensorCount;
-         P < E; P += TensorByteSize) {
-      OutFile << "      feature: { " << FieldName << ": { value: ";
-      ValueWriter(P);
-      OutFile << " } }\n";
-    }
-    OutFile << "    }\n";
-    OutFile << "  }\n";
-  }
-
   StringRef LogFileName;
   const ModelUnderTrainingRunner *const MUTR;
-  std::vector<InlineFeatures> Features;
-  std::vector<int64_t> DefaultDecisions;
-  // We store all outputs as data blobs, but we always expect to have one, the
-  // first one, representing the decision. While we could track that separately,
-  // for uniformity, we store it, generically, here.
-  std::vector<std::vector<char>> Outputs;
+  std::unique_ptr<Logger> L;
   std::vector<bool> Effects;
-  std::vector<int64_t> Rewards;
+  /// There's at least one output. We'll set this to a different value if MUTR
+  /// is avaliable.
+  size_t OutputCount = 1;
+  /// Set these 2 clearly OOB, to make sure we set them later.
+  size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
+  size_t DecisionPos = std::numeric_limits<size_t>::max();
 };
 
 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
@@ -331,8 +256,8 @@ class LoggingMLInlineAdvice : public MLInlineAdvice {
   TrainingLogger &Logger;
   const Optional<size_t> CallerSizeEstimateBefore;
   const Optional<size_t> CalleeSizeEstimateBefore;
-  const bool DefaultDecision;
-  const bool Mandatory;
+  const int64_t DefaultDecision;
+  const int64_t Mandatory;
 };
 
 /// A pseudo model runner. We use it to store feature values when collecting
@@ -402,69 +327,62 @@ class ModelUnderTrainingRunner final : public MLModelRunner {
 TrainingLogger::TrainingLogger(StringRef LogFileName,
                                const ModelUnderTrainingRunner *MUTR)
     : LogFileName(LogFileName), MUTR(MUTR) {
+  // The first output is the inlining decision.
+  if (MUTR)
+    OutputCount = MUTR->outputSpecs().size();
+  std::vector<Logger::LoggedFeatureSpec> FT;
+
   for (size_t I = 0; I < NumberOfFeatures; ++I)
-    Features.push_back(InlineFeatures());
+    FT.push_back(
+        {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
+  for (size_t I = 1; I < OutputCount; ++I)
+    FT.push_back({MUTR->outputSpecs()[I], MUTR->outputNames()[I]});
 
-  // The first output is the inlining decision.
-  auto OutputCount = MUTR ? MUTR->outputSpecs().size() : 1;
-  Outputs.assign(OutputCount, std::vector<char>());
+  DefaultDecisionPos = FT.size();
+  FT.push_back(
+      {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
+
+  DecisionPos = FT.size();
+  FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
+
+  L = std::make_unique<Logger>(
+      FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
+      InlineSizeEstimatorAnalysis::isEvaluatorRequested());
 }
 
 /// Log one inlining event.
 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
                                     const MLModelRunner &ModelRunner) {
-  for (size_t I = 0; I < NumberOfFeatures; ++I)
-    Features[I].push_back(ModelRunner.getFeature(I));
+  size_t CurrentFeature = 0;
+  for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
+    int64_t F = ModelRunner.getFeature(CurrentFeature);
+    L->logTensorValue(CurrentFeature, &F);
+  }
 
-  Effects.push_back(Event.Effect);
-  Rewards.push_back(Event.Reward);
-  DefaultDecisions.push_back(Event.DefaultDecision);
-  int64_t Advice = static_cast<int64_t>(Event.AdvisedDecision);
-  const char *AdviceData = reinterpret_cast<const char *>(&Advice);
-  Outputs[0].insert(Outputs[0].end(), AdviceData, AdviceData + sizeof(int64_t));
-  for (size_t I = 1; I < Outputs.size(); ++I) {
+  for (size_t I = 1; I < OutputCount; ++I) {
     const auto &Result = *MUTR->lastEvaluationResult();
     auto &Spec = MUTR->outputSpecs()[I];
     const char *RawData =
         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
-    Outputs[I].insert(Outputs[I].end(), RawData,
-                      RawData +
-                          Spec.getElementCount() * Spec.getElementByteSize());
+    L->logTensorValue(CurrentFeature, RawData,
+                      Spec.getElementCount() * Spec.getElementByteSize());
+    ++CurrentFeature;
   }
+
+  assert(CurrentFeature == DefaultDecisionPos);
+  L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
+  L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
+  if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
+    L->logReward(Event.Reward);
+
+  // For debugging / later use
+  Effects.push_back(Event.Effect);
 }
 
 void TrainingLogger::print() {
   std::error_code EC;
   raw_fd_ostream OutFile(LogFileName, EC);
-  size_t NumberOfRecords = Rewards.size();
-  if (NumberOfRecords == 0)
-    return;
-
-  OutFile << "feature_lists: {\n";
-  for (size_t I = 0; I < Features.size(); ++I)
-    writeTensorsAsFeatureLists(
-        OutFile, TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}),
-        Features[I].data(), NumberOfRecords);
-
-  writeTensorsAsFeatureLists(
-      OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}),
-      DefaultDecisions.data(), NumberOfRecords);
-
-  writeRawTensorsAsFeatureLists(
-      OutFile, TensorSpec::createSpec<int64_t>(DecisionName, {1}),
-      Outputs[0].data(), NumberOfRecords);
-
-  if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
-    writeTensorsAsFeatureLists(OutFile,
-                               TensorSpec::createSpec<int64_t>(RewardName, {1}),
-                               Rewards.data(), NumberOfRecords);
-
-  for (size_t I = 1; I < Outputs.size(); ++I)
-    writeRawTensorsAsFeatureLists(OutFile, MUTR->outputSpecs()[I],
-                                  Outputs[I].data(), NumberOfRecords,
-                                  StringRef(MUTR->outputNames()[I]));
-
-  OutFile << "}\n";
+  L->print(OutFile);
 }
 
 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(