swiftlang
diff --git a/‎[refs]
Lines changed: 1 addition & 1 deletion b/‎[refs]
Lines changed: 1 addition & 1 deletion
diff --git a/‎branches/tensorflow-merge/include/swift/AST/KnownProtocols.def
Lines changed: 1 addition & 0 deletions b/‎branches/tensorflow-merge/include/swift/AST/KnownProtocols.def
Lines changed: 1 addition & 0 deletions
diff --git a/‎branches/tensorflow-merge/lib/AST/ASTContext.cpp
Lines changed: 1 addition & 0 deletions b/‎branches/tensorflow-merge/lib/AST/ASTContext.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎branches/tensorflow-merge/lib/IRGen/GenMeta.cpp
Lines changed: 1 addition & 0 deletions b/‎branches/tensorflow-merge/lib/IRGen/GenMeta.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎branches/tensorflow-merge/lib/SILOptimizer/Mandatory/TFLowerGraph.cpp
Lines changed: 138 additions & 10 deletions b/‎branches/tensorflow-merge/lib/SILOptimizer/Mandatory/TFLowerGraph.cpp
Lines changed: 138 additions & 10 deletions
@@ -1379,7 +1379,7 @@ refs/heads/chase-my-tail: 8bb91443a9e81bbfac92a2621a0af887a1da8dbf
 refs/heads/consider-outer-alternatives: 708bac749ec60a22a79e2eefbe734f9488a7370d
 refs/heads/revert-25740-oops-i-linked-it-again: fdd41aeb682fc488572bdc1cf71b2ff6997ba576
 refs/heads/swift-5.1-branch-06-12-2019: e63b7b2d3b93c48232d386099d0ec525d21d8f8d
-refs/heads/tensorflow-merge: 505ef09ac32aa169823591a8842003dc52edbf7f
+refs/heads/tensorflow-merge: 730fc85b582c0997281b05e2da7a96da69a6c40b
 refs/heads/update-checkout-sha-info: 5832743c5c2a842976c42a508a4c6dcceefb0aef
 refs/tags/swift-5.1-DEVELOPMENT-SNAPSHOT-2019-06-12-a: 228f0448d9bb909aacbba4afcb7c600a405d15da
 refs/tags/swift-5.1-DEVELOPMENT-SNAPSHOT-2019-06-14-a: 922861a77b5fc2bf46bc917da70ceb15eef76836
 
@@ -67,6 +67,7 @@ PROTOCOL(CodingKey)
 PROTOCOL(Encodable)
 PROTOCOL(Decodable)
 // SWIFT_ENABLE_TENSORFLOW
+PROTOCOL(AccelerableByTensorFlow)
 PROTOCOL(TensorProtocol)
 PROTOCOL(Differentiable)
 
 
@@ -869,6 +869,7 @@ ProtocolDecl *ASTContext::getProtocol(KnownProtocolKind kind) const {
     M = getLoadedModule(Id_CoreFoundation);
     break;
   // SWIFT_ENABLE_TENSORFLOW
+  case KnownProtocolKind::AccelerableByTensorFlow:
   case KnownProtocolKind::TensorProtocol:
     M = getLoadedModule(getIdentifier("TensorFlow"));
     break;
 
@@ -3472,6 +3472,7 @@ SpecialProtocol irgen::getSpecialProtocolID(ProtocolDecl *P) {
   case KnownProtocolKind::Encodable:
   case KnownProtocolKind::Decodable:
   // SWIFT_ENABLE_TENSORFLOW
+  case KnownProtocolKind::AccelerableByTensorFlow:
   case KnownProtocolKind::TensorProtocol:
   case KnownProtocolKind::Differentiable:
     return SpecialProtocol::None;
 
@@ -62,6 +62,10 @@ struct GraphGlobalConfiguration {
 
 static const char DEVICE_TPU_REPLICATED_CORE[] = "TPU_REPLICATED_CORE";
 static const char DEVICE_TPU_SYSTEM[] = "TPU_SYSTEM";
+// Set a small number to exercise the bounded queue capacity more, increasing
+// test coverage.
+// FIXME: Tune the default value for performance, and/or make it configurable.
+static const int NAMED_TENSOR_QUEUE_CAPACITY = 1;
 
 /// When generating a TF TPU graph, call this function to place an eligible TF
 /// graph node onto TPU device. Some nodes such as Placeholder and
@@ -142,6 +146,12 @@ namespace {
     /// This is a list of all of the operations that make up this function.
     std::vector<const TF_Operation*> operations;
 
+    // When true, lower effectful ops (e.g. Swift->TF send ops), if any, in the
+    // corresponding TF function. Currently in a While op context, these ops
+    // should not be run in the cond function.
+    // TODO(b/78472806): Add a more thorough and proper fix for effectful ops in
+    // the cond function.
+    bool shouldLowerEffectfulOps = true;
   public:
     GraphFunctionBody(GraphGlobalConfiguration configuration)
       : configuration(configuration), graph(TF_NewGraph(), &TF_DeleteGraph) {}
@@ -173,6 +183,21 @@ namespace {
 
       return result;
     }
+
+    // If there is a control dependence value, run it before producing an output
+    // tensor in GraphFunctionBody.
+    TF_Output maybeRunEffectfulOp(TF_Output result, TF_Status *status) {
+      if (!controlDependenceValue) return result;
+
+      std::string nodeName = "RunControlDependency";
+      auto *desc = TF_NewOperation(getGraph(), "Identity", nodeName.c_str());
+      TF_AddControlInput(desc, controlDependenceValue);
+      TF_AddInput(desc, result);
+      TF_Operation *newResult = finishOp(desc, /*hasSideEffects*/ false,
+                                         /*isEligibleForTPU*/ false, status);
+      controlDependenceValue = nullptr;
+      return {newResult, 0};
+    }
   };
 }
 
@@ -194,6 +219,10 @@ struct TFGraphLowering : public SILInstructionVisitor<TFGraphLowering> {
   // a value corresponds to, along with the scope ID of the value.
   ValueMappingScopedHashTable valueMapping;
 
+  // Track those tensor ids that have been lowered to graph ops for TF->Swift
+  // tensor sends.
+  llvm::SmallSet<int, 4> processedTensorIdsForSend;
+
   /// This flag gets set if lowering code to the graph produces a TensorFlow
   /// error and emits a diagnostic.  This tells us to stop lowering and give up
   /// gracefully.
@@ -458,10 +487,8 @@ struct TFGraphLowering : public SILInstructionVisitor<TFGraphLowering> {
   void visitStringLiteralInst(StringLiteralInst *inst) {}
 
   void visitBuiltinInst(BuiltinInst *inst);
-  void visitBuiltinTFSendInst(BuiltinInst *inst) {
-    internalError(inst->getLoc(),
-                  "GraphGen cannot lower a 'send' to the host yet");
-  }
+  void visitBuiltinTFSendInst(BuiltinInst *inst);
+
   void visitBuiltinTFReceiveInst(BuiltinInst *inst) {
     internalError(inst->getLoc(),
                   "GraphGen cannot lower a 'receive' from the host yet");
@@ -751,6 +778,99 @@ static void decodeShapeArray(SILInstruction *inst,
   }
 }
 
+void TFGraphLowering::visitBuiltinTFSendInst(BuiltinInst *inst) {
+  auto &graphFn = getCurrentGraphFunction();
+  // TODO(b/78472806): Add a more thorough and proper fix for effectful ops in
+  // the while cond function.
+  if (!graphFn.shouldLowerEffectfulOps) return;
+
+  // Decode the tensor id from the builtin name.
+  // Example: builtin "tensorflowSend_0"<TensorHandle<Float>>(...) : $()
+  int tensorId = -1;
+  {
+    auto name = inst->getName().str();
+    auto tensorIdStr = name.substr(strlen("tensorflowSend_"));
+    bool isInt = llvm::to_integer(tensorIdStr, tensorId, 10);
+    assert(isInt);
+  }
+
+  // Type check and process the parameter.
+  TF_Output inputOp;
+  TF_DataType inputType;
+  {
+    assert(inst->getNumOperands() == 1);
+    auto operand = inst->getOperand(0);
+    inputOp = getOperandValue(operand);
+    if (!inputOp.oper) return;  // Error occurred.
+    inputType = getTensorFlowDataType(operand->getType(), inst->getLoc());
+  }
+
+  // Add enqueue to the local graph function, and the corresponding dequeue to
+  // the top level function, so that caller can dequeue tensors via SessionRun.
+  TF_Operation *queueOp;
+  {
+    auto opName = "fifo_queue_" + llvm::itostr(tensorId);
+    auto *desc =
+        TF_NewOperation(graphFn.getGraph(), "FIFOQueueV2", opName.c_str());
+    TF_SetDevice(desc, "/device:CPU:0");
+    TF_SetAttrInt(desc, "capacity", NAMED_TENSOR_QUEUE_CAPACITY);
+    TF_SetAttrTypeList(desc, "component_types", &inputType, 1);
+    TF_SetAttrString(desc, "shared_name", opName.data(), opName.size());
+    queueOp = graphFn.finishOp(desc, /*hasSideEffects*/ false,
+                               /*isEligibleForTPU*/ false, status);
+    if (checkStatus(getUserSourceLocation(inst->getDebugLocation())))
+      return;
+  }
+
+  {
+    auto opName = "fifo_queue_enqueue_" + llvm::itostr(tensorId);
+    auto *desc =
+        TF_NewOperation(graphFn.getGraph(), "QueueEnqueueV2", opName.c_str());
+    TF_AddInput(desc, {queueOp, 0});
+    TF_AddInputList(desc, &inputOp, 1);
+    TF_SetDevice(desc, "/device:CPU:0");
+    TF_SetAttrTypeList(desc, "Tcomponents", &inputType, 1);
+
+    graphFn.finishOp(desc, /*hasSideEffects*/ true,
+                     /*isEligibleForTPU*/ false, status);
+    if (checkStatus(getUserSourceLocation(inst->getDebugLocation())))
+      return;
+  }
+
+  // Now add dequeue to the top level graph function.
+  // Multiple graph functions can have an enqueue op over the same tensorId.
+  // One example is to enqueue tensors both within the while op's body
+  // function, and also right after the while op is executed.
+  // In that case, we only generate a single dequeue op at the top level.
+  if (!processedTensorIdsForSend.insert(tensorId).second) return;
+
+  // The code here is different enough from the above that it's not worth
+  // extracting common code into functions.
+  TF_Operation *globalQueueOp;
+  {
+    auto opName = "fifo_queue_" + llvm::itostr(tensorId);
+    auto *desc = TF_NewOperation(resultGraph, "FIFOQueueV2", opName.c_str());
+    TF_SetDevice(desc, "/device:CPU:0");
+    TF_SetAttrInt(desc, "capacity", NAMED_TENSOR_QUEUE_CAPACITY);
+    TF_SetAttrTypeList(desc, "component_types", &inputType, 1);
+    // FIXME: Revisit whether to populate "shared_name".
+    TF_SetAttrString(desc, "shared_name", opName.data(), opName.size());
+    globalQueueOp = TF_FinishOperation(desc, status);
+    if (checkStatus(getUserSourceLocation(inst->getDebugLocation())))
+      return;
+  }
+
+  {
+    auto opName = "fifo_queue_dequeue_" + llvm::itostr(tensorId);
+    auto *desc = TF_NewOperation(resultGraph, "QueueDequeueV2", opName.c_str());
+    TF_AddInput(desc, {globalQueueOp, 0});
+    TF_SetDevice(desc, "/device:CPU:0");
+    TF_SetAttrTypeList(desc, "component_types", &inputType, 1);
+    TF_FinishOperation(desc, status);
+    if (checkStatus(getUserSourceLocation(inst->getDebugLocation()))) return;
+  }
+}
+
 void TFGraphLowering::visitTFDataset(BuiltinInst *inst) {
   // FIXME: Also support dataset/iterator outside of TPU context.
   if(!configuration.isTPUEnabled || !configuration.isTPUInfeedEnabled) {
@@ -1190,11 +1310,15 @@ void TFGraphLowering::visitReturnInst(ReturnInst *inst) {
     for (auto &operand : ti->getAllOperands()) {
       auto result = getOperandValue(operand.get());
       if (!result.oper) return; // Error occurred.
+      result = graphFn.maybeRunEffectfulOp(result, status);
+      if (checkStatus(SILFn.getLocation())) return;
       graphFn.outputs.push_back({ /*SILArgument*/nullptr, result });
     }
   } else {
     auto result = getOperandValue(inst->getOperand());
     if (!result.oper) return; // Error occurred.
+    result = graphFn.maybeRunEffectfulOp(result, status);
+    if (checkStatus(SILFn.getLocation())) return;
     graphFn.outputs.push_back({ /*SILArgument*/nullptr, result });
   }
 }
@@ -1214,6 +1338,8 @@ void TFGraphLowering::visitBranchInst(BranchInst *inst) {
   for (unsigned i = 0, e = inst->getNumArgs(); i != e; ++i) {
     auto result = getOperandValue(inst->getArg(i));
     if (!result.oper) return; // Error occurred.
+    result = graphFn.maybeRunEffectfulOp(result, status);
+    if (checkStatus(SILFn.getLocation())) return;
     graphFn.outputs.push_back({ destBB->getArgument(i), result });
   }
 }
@@ -1354,9 +1480,9 @@ void TFGraphLowering::lowerWhileLoopRegion(WhileLoopSESERegion *r) {
     // body, we are required to emit the computation into both functions, and
     // rely on XLA to CSE it where possible (which I suspect it doesn't do).
     //
-    // This will also be problematic when the condition is allowed to have
-    // side effects (e.g. because of send and recv) because they cannot be
-    // reissued in general.
+    // This will also be problematic when the condition is allowed to have side
+    // effects (e.g. because of send and recv) because they cannot be reissued
+    // in general.
     //
     // A better model for while loop is to change the condition to be a function
     // "T -> (U, bool)" and have the loop body be "U -> T".  This structure
@@ -1389,9 +1515,10 @@ void TFGraphLowering::lowerWhileLoopRegion(WhileLoopSESERegion *r) {
 
   for (unsigned i = loopBodyFn.outputs.size(), e = loopBodyFn.inputs.size();
        i != e; ++i) {
-    loopBodyFn.outputs.push_back({
-      /*SILArgument*/nullptr, loopBodyFn.inputs[i].parameter
-    });
+    auto result =
+        loopBodyFn.maybeRunEffectfulOp(loopBodyFn.inputs[i].parameter, status);
+    if (checkStatus(SILFn.getLocation())) return;
+    loopBodyFn.outputs.push_back({/*SILArgument*/ nullptr, result});
   }
 
   // Next, lower the condition function into a 'stop predicate' for the loop.
@@ -1412,6 +1539,7 @@ void TFGraphLowering::lowerWhileLoopRegion(WhileLoopSESERegion *r) {
 
     // Lower any code in the header block, which may be used by the termination
     // condition.  It ends with a conditional branch which we handle manually.
+    graphFn.shouldLowerEffectfulOps = false;
     lowerBasicBlock(r->getHeader(), /*skipTerminator:*/ true);
     if (errorOccurred) return;