Merge pull request #1004 from NVIDIA/support_multiple_delimiters

narendasan · web-flow · commit 8af0422434e3 · 2022-05-06T18:47:38.000-07:00
fix(//core/runtime): Support more delimiter variants
diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp
@@ -21,7 +21,7 @@ InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> va
 
   std::unordered_map<const torch::jit::Value*, core::ir::Input> a;
   for (size_t i = 0; i < vals.size(); i++) {
-    LOG_DEBUG("Paring " << i << ": " << vals[i]->debugName() << " : " << specs[i]);
+    LOG_DEBUG("Pairing " << i << ": " << vals[i]->debugName() << " : " << specs[i]);
     a.insert({vals[i], specs[i]});
   }
   return a;
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -53,13 +53,25 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe
   TORCHTRT_CHECK((cuda_engine.get() != nullptr), "Unable to deserialize the TensorRT engine");
 
   exec_ctx = make_trt(cuda_engine->createExecutionContext());
+  TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");
 
   uint64_t inputs = 0;
   uint64_t outputs = 0;
 
   for (int64_t x = 0; x < cuda_engine->getNbBindings(); x++) {
     std::string bind_name = cuda_engine->getBindingName(x);
-    std::string idx_s = bind_name.substr(bind_name.find("_") + 1);
+    LOG_DEBUG("Binding name: " << bind_name);
+    auto delim = bind_name.find(".");
+    if (delim == std::string::npos) {
+      delim = bind_name.find("_");
+      TORCHTRT_CHECK(
+          delim != std::string::npos,
+          "Unable to determine binding index for input "
+              << bind_name
+              << "\nEnsure module was compiled with Torch-TensorRT.ts or follows Torch-TensorRT Runtime conventions");
+    }
+
+    std::string idx_s = bind_name.substr(delim + 1);
     uint64_t idx = static_cast<uint64_t>(std::stoi(idx_s));
 
     if (cuda_engine->bindingIsInput(x)) {
@@ -71,6 +83,8 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe
     }
   }
   num_io = std::make_pair(inputs, outputs);
+
+  LOG_DEBUG(*this);
 }
 
 TRTEngine& TRTEngine::operator=(const TRTEngine& other) {
@@ -82,6 +96,34 @@ TRTEngine& TRTEngine::operator=(const TRTEngine& other) {
   return (*this);
 }
 
+std::string TRTEngine::to_str() const {
+  std::stringstream ss;
+  ss << "Torch-TensorRT TensorRT Engine:" << std::endl;
+  ss << "  Name: " << name << std::endl;
+  ss << "  Inputs: [" << std::endl;
+  for (uint64_t i = 0; i < num_io.first; i++) {
+    ss << "    id: " << i << std::endl;
+    ss << "      shape: " << exec_ctx->getBindingDimensions(i) << std::endl;
+    ss << "      dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl;
+  }
+  ss << "  ]" << std::endl;
+  ss << "  Outputs: [" << std::endl;
+  for (uint64_t o = 0; o < num_io.second; o++) {
+    ss << "    id: " << o << std::endl;
+    ss << "      shape: " << exec_ctx->getBindingDimensions(o) << std::endl;
+    ss << "      dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl;
+  }
+  ss << "  ]" << std::endl;
+  ss << "  Device: " << device_info << std::endl;
+
+  return ss.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const TRTEngine& engine) {
+  os << engine.to_str();
+  return os;
+}
+
 // TODO: Implement a call method
 // c10::List<at::Tensor> TRTEngine::Run(c10::List<at::Tensor> inputs) {
 //     auto input_vec = inputs.vec();
@@ -96,6 +138,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
         .def(torch::init<std::vector<std::string>>())
         // TODO: .def("__call__", &TRTEngine::Run)
         // TODO: .def("run", &TRTEngine::Run)
+        .def("__str__", &TRTEngine::to_str)
         .def_pickle(
             [](const c10::intrusive_ptr<TRTEngine>& self) -> std::vector<std::string> {
               // Serialize TensorRT engine
diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h
@@ -59,6 +59,8 @@ struct TRTEngine : torch::CustomClassHolder {
   TRTEngine(std::vector<std::string> serialized_info);
   TRTEngine(std::string mod_name, std::string serialized_engine, CudaDevice cuda_device);
   TRTEngine& operator=(const TRTEngine& other);
+  std::string to_str() const;
+  friend std::ostream& operator<<(std::ostream& os, const TRTEngine& engine);
   // TODO: Implement a call method
   // c10::List<at::Tensor> Run(c10::List<at::Tensor> inputs);
 };
diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h
@@ -739,7 +739,12 @@ TORCHTRT_API std::string convert_method_to_trt_engine(
  * module. Registers execution of the engine as the forward method of the module
  * Forward is defined as: forward(Tensor[]) -> Tensor[]
  *
- * @return: A new module trageting a TensorRT engine
+ * TensorRT bindings must have names with the following format:
+ * - [symbol].[index in input / output array]
+ * ex.
+ * - [x.0, x.1, x.2] -> [y.0]
+ *
+ * @return: A new module targeting a TensorRT engine
  */
 TORCHTRT_API torch::jit::Module embed_engine_in_new_module(const std::string& engine, Device device);
 } // namespace torchscript
diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py
@@ -207,6 +207,11 @@ def embed_engine_in_new_module(serialized_engine: bytes, device=Device._current_
 
         forward(Tensor[]) -> Tensor[]
 
+    TensorRT bindings must have names with the following format:
+      - [symbol].[index in input / output array]
+      ex.
+      - [x.0, x.1, x.2] -> [y.0]
+
     Module can be save with engine embedded with torch.jit.save and moved / loaded according to torch_tensorrt portability rules
 
     Arguments:
diff --git a/tests/util/run_graph_engine.cpp b/tests/util/run_graph_engine.cpp
@@ -21,7 +21,7 @@ std::vector<core::ir::Input> toInputs(std::vector<at::Tensor> ten) {
   for (auto i : ten) {
     a.push_back(core::ir::Input(core::util::toVec(i.sizes())));
   }
-  return std::move(a);
+  return a;
 }
 
 std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool dynamic_batch) {
@@ -49,7 +49,7 @@ std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool d
     }
   }
 
-  return std::move(a);
+  return a;
 }
 
 std::vector<at::Tensor> RunEngine(std::string& eng, std::vector<at::Tensor> inputs) {

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> va`
`21`	`21`
`22`	`22`	`std::unordered_map<const torch::jit::Value*, core::ir::Input> a;`
`23`	`23`	`for (size_t i = 0; i < vals.size(); i++) {`
`24`		`- LOG_DEBUG("Paring " << i << ": " << vals[i]->debugName() << " : " << specs[i]);`
	`24`	`+ LOG_DEBUG("Pairing " << i << ": " << vals[i]->debugName() << " : " << specs[i]);`
`25`	`25`	`a.insert({vals[i], specs[i]});`
`26`	`26`	`}`
`27`	`27`	`return a;`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ std::vector<core::ir::Input> toInputs(std::vector<at::Tensor> ten) {`
`21`	`21`	`for (auto i : ten) {`
`22`	`22`	`a.push_back(core::ir::Input(core::util::toVec(i.sizes())));`
`23`	`23`	`}`
`24`		`- return std::move(a);`
	`24`	`+ return a;`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool dynamic_batch) {`
`@@ -49,7 +49,7 @@ std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool d`
`49`	`49`	`}`
`50`	`50`	`}`
`51`	`51`
`52`		`- return std::move(a);`
	`52`	`+ return a;`
`53`	`53`	`}`
`54`	`54`
`55`	`55`	`std::vector<at::Tensor> RunEngine(std::string& eng, std::vector<at::Tensor> inputs) {`