refactor: Review comments incorporated

Anurag Dixit · Anurag Dixit · commit f7bef90c34fe · 2021-05-26T23:40:22.000-07:00
Signed-off-by: Anurag Dixit &lt;anuragd@nvidia.com&gt;
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -27,13 +27,33 @@
 namespace trtorch {
 namespace core {
 
+static std::unordered_map<int, runtime::CudaDevice> cuda_device_list;
+
+void update_cuda_device_list(void) {
+  int num_devices = 0;
+  auto status = cudaGetDeviceCount(&num_devices);
+  TRTORCH_ASSERT((status == cudaSuccess), "Unable to read CUDA capable devices. Return status: " << status);
+  cudaDeviceProp device_prop;
+  for (int i = 0; i < num_devices; i++) {
+    TRTORCH_CHECK(
+        (cudaGetDeviceProperties(&device_prop, i) == cudaSuccess),
+        "Unable to read CUDA Device Properies for device id: " << i);
+    std::string device_name(device_prop.name);
+    runtime::CudaDevice device = {
+        i, device_prop.major, device_prop.minor, nvinfer1::DeviceType::kGPU, device_name.size(), device_name};
+    cuda_device_list[i] = device;
+  }
+}
+
 void AddEngineToGraph(
     torch::jit::script::Module mod,
     std::shared_ptr<torch::jit::Graph>& g,
     const std::string& serialized_engine,
     runtime::CudaDevice& device_info,
     std::string engine_id = "",
     bool fallback = false) {
+  // Scan and Update the list of available cuda devices
+  update_cuda_device_list();
   auto engine_ptr =
       c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + engine_id, serialized_engine, device_info);
   // Get required metadata about the engine out
@@ -277,13 +297,13 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
   return new_mod;
 }
 
-torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec cfg) {
+torch::jit::script::Module EmbedEngineInNewModule(
+    const std::string& engine,
+    trtorch::core::runtime::CudaDevice cuda_device) {
   std::ostringstream engine_id;
   engine_id << reinterpret_cast<const int*>(&engine);
   torch::jit::script::Module new_mod("tensorrt_engine_mod_" + engine_id.str());
   auto new_g = std::make_shared<torch::jit::Graph>();
-  auto device_spec = cfg.convert_info.engine_settings.device;
-  auto cuda_device = runtime::get_device_info(device_spec.gpu_id, device_spec.device_type);
   AddEngineToGraph(new_mod, new_g, engine, cuda_device);
   auto new_method = new_mod._ivalue()->compilation_unit()->create_function("forward", new_g);
   auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
diff --git a/core/compiler.h b/core/compiler.h
@@ -23,7 +23,7 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::
 
 torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg);
 
-torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec cfg);
+torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device);
 
 void set_device(const int gpu_id);
 
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -31,9 +31,9 @@ TRTEngine::TRTEngine(std::vector<std::string> serialized_info)
           util::logging::get_logger().get_reportable_severity(),
           util::logging::get_logger().get_is_colored_output_on()) {
   std::string _name = "deserialized_trt";
-  std::string engine_info = serialized_info[EngineIdx];
+  std::string engine_info = serialized_info[ENGINE_IDX];
 
-  CudaDevice cuda_device = deserialize_device(serialized_info[DeviceIdx]);
+  CudaDevice cuda_device = deserialize_device(serialized_info[DEVICE_IDX]);
   new (this) TRTEngine(_name, engine_info, cuda_device);
 }
 
diff --git a/core/runtime/register_trt_op.cpp b/core/runtime/register_trt_op.cpp
@@ -10,6 +10,13 @@ namespace trtorch {
 namespace core {
 namespace runtime {
 
+// SM Compute capability <Compute Capability, Device Name> map
+const std::unordered_map<std::string, std::string>& get_dla_supported_SM() {
+  // Xavier SM Compute Capability
+  static std::unordered_map<std::string, std::string> dla_supported_SM = {{"7.2", "Xavier"}};
+  return dla_supported_SM;
+}
+
 // Checks if the context switch requred for device ID
 bool is_switch_required(const CudaDevice& curr_device, const CudaDevice& conf_device) {
   // If SM capability is not the same as configured then switch
@@ -40,34 +47,23 @@ bool is_switch_required(const CudaDevice& curr_device, const CudaDevice& conf_de
 
 int select_cuda_device(const CudaDevice& conf_device) {
   int device_id = 0;
-  int num_devices = 0;
-  // SM Compute capability <major,minor> pair
-  std::unordered_map<std::string, std::string> dla_supported_SM;
+  auto dla_supported = get_dla_supported_SM();
 
-  // Xavier SM Compute Capability
-  dla_supported_SM.insert(std::make_pair("7.2", "Xavier"));
-  auto status = cudaGetDeviceCount(&num_devices);
-  TRTORCH_CHECK((status == cudaSuccess), "Unable to read CUDA capable devices. Return status: " << status);
-
-  cudaDeviceProp device_prop;
+  auto cuda_device_list = DeviceList::instance().get_devices();
 
-  for (int i = 0; i < num_devices; i++) {
-    TRTORCH_CHECK(
-        (cudaGetDeviceProperties(&device_prop, i) == cudaSuccess),
-        "Unable to read CUDA Device Properies for device id: " << i);
-    auto compute_cap = std::to_string(device_prop.major) + "." + std::to_string(device_prop.minor);
-    std::string device_name{device_prop.name};
+  for (auto device : cuda_device_list) {
+    auto compute_cap = std::to_string(device.second.major) + "." + std::to_string(device.second.minor);
     // In case of DLA select the DLA supported device ID
     if (conf_device.device_type == nvinfer1::DeviceType::kDLA) {
-      if (dla_supported_SM.find(compute_cap) != dla_supported_SM.end() &&
-          dla_supported_SM[compute_cap] == device_name) {
-        device_id = i;
+      if (dla_supported.find(compute_cap) != dla_supported.end() &&
+          dla_supported[compute_cap] == device.second.device_name) {
+        device_id = device.second.id;
         break;
       }
     } else if (conf_device.device_type == nvinfer1::DeviceType::kGPU) {
       auto conf_sm = std::to_string(conf_device.major) + "." + std::to_string(conf_device.minor);
-      if (compute_cap == conf_sm && device_name == conf_device.device_name) {
-        device_id = i;
+      if (compute_cap == conf_sm && device.second.device_name == conf_device.device_name) {
+        device_id = device.second.id;
         break;
       }
     } else {
@@ -83,6 +79,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
 
   CudaDevice curr_device;
   get_cuda_device(curr_device);
+  LOG_DEBUG("Current Device ID: " << curr_device.id);
 
   if (is_switch_required(curr_device, compiled_engine->device_info)) {
     // Scan through available CUDA devices and set the CUDA device context correctly
diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h
@@ -11,7 +11,7 @@ namespace runtime {
 
 using EngineID = int64_t;
 
-typedef enum { DeviceIdx = 0, EngineIdx } SerializedInfoIndex;
+typedef enum { DEVICE_IDX = 0, ENGINE_IDX } SerializedInfoIndex;
 
 struct CudaDevice {
   int64_t id; // CUDA device id
@@ -78,6 +78,28 @@ CudaDevice deserialize_device(std::string device_info);
 
 CudaDevice get_device_info(int64_t gpu_id, nvinfer1::DeviceType device_type);
 
+class DeviceList {
+  using DeviceMap = std::unordered_map<int, CudaDevice>;
+  DeviceMap device_list;
+  DeviceList() {}
+
+ public:
+  static DeviceList& instance() {
+    static DeviceList obj;
+    return obj;
+  }
+
+  void insert(int device_id, CudaDevice cuda_device) {
+    device_list[device_id] = cuda_device;
+  }
+  CudaDevice find(int device_id) {
+    return device_list[device_id];
+  }
+  DeviceMap get_devices() {
+    return device_list;
+  }
+};
+
 struct TRTEngine : torch::CustomClassHolder {
   // Each engine needs it's own runtime object
   nvinfer1::IRuntime* rt;
diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
@@ -517,15 +517,15 @@ TRTORCH_API std::string ConvertGraphToTRTEngine(
  * in a TorchScript module
  *
  * @param engine: std::string - Pre-built serialized TensorRT engine
- * @param info: trtorch::CompileSpec - Compilation settings
+ * @param info: CompileSepc::Device - Device information
  *
  * Takes a pre-built serialized TensorRT engine and embeds it in a TorchScript
  * module. Registers execution of the engine as the forward method of the module
  * Forward is defined as: forward(Tensor[]) -> Tensor[]
  *
  * @return: A new module trageting a TensorRT engine
  */
-TRTORCH_API torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec info);
+TRTORCH_API torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec::Device device);
 
 /**
  * @brief Set gpu device id
diff --git a/cpp/api/src/compile_spec.cpp b/cpp/api/src/compile_spec.cpp
@@ -74,6 +74,19 @@ std::vector<core::ir::InputRange> to_vec_internal_input_ranges(std::vector<Compi
   return internal;
 }
 
+core::runtime::CudaDevice to_internal_cuda_device(CompileSpec::Device device) {
+  auto device_type = nvinfer1::DeviceType::kGPU;
+  switch (device.device_type) {
+    case CompileSpec::Device::DeviceType::kDLA:
+      device_type = nvinfer1::DeviceType::kDLA;
+      break;
+    case CompileSpec::Device::DeviceType::kGPU:
+    default:
+      device_type = nvinfer1::DeviceType::kGPU;
+  }
+  return core::runtime::get_device_info(device.gpu_id, device_type);
+}
+
 core::CompileSpec to_internal_compile_spec(CompileSpec external) {
   core::CompileSpec internal(to_vec_internal_input_ranges(external.input_ranges));
 
diff --git a/cpp/api/src/trtorch.cpp b/cpp/api/src/trtorch.cpp
@@ -9,6 +9,7 @@ namespace trtorch {
 
 // Defined in compile_spec.cpp
 core::CompileSpec to_internal_compile_spec(CompileSpec external);
+core::runtime::CudaDevice to_internal_cuda_device(CompileSpec::Device device);
 
 bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, std::string method_name) {
   return core::CheckMethodOperatorSupport(module, method_name);
@@ -31,8 +32,8 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module
   return core::CompileGraph(module, to_internal_compile_spec(info));
 }
 
-torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec info) {
-  return core::EmbedEngineInNewModule(engine, to_internal_compile_spec(info));
+torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec::Device device) {
+  return core::EmbedEngineInNewModule(engine, to_internal_cuda_device(device));
 }
 
 std::string get_build_info() {
diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp
@@ -119,8 +119,8 @@ bool CheckMethodOperatorSupport(const torch::jit::Module& module, const std::str
   return core::CheckMethodOperatorSupport(module, method_name);
 }
 
-torch::jit::Module EmbedEngineInNewModule(const py::bytes& engine, CompileSpec& info) {
-  return core::EmbedEngineInNewModule(engine, info.toInternalCompileSpec());
+torch::jit::Module EmbedEngineInNewModule(const py::bytes& engine, core::runtime::CudaDevice& device) {
+  return core::EmbedEngineInNewModule(engine, device);
 }
 
 std::string get_build_info() {
diff --git a/tests/modules/test_modules_as_engines.cpp b/tests/modules/test_modules_as_engines.cpp
@@ -36,8 +36,12 @@ TEST_P(ModuleTests, ModuleToEngineToModuleIsClose) {
   }
 
   auto compile_spec = trtorch::CompileSpec({input_ranges});
+  int device_id = 0;
+  cudaGetDevice(&device_id);
+  compile_spec.device.device_type = trtorch::CompileSpec::Device::DeviceType::kGPU;
+  compile_spec.device.gpu_id = device_id;
   auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", input_ranges);
-  auto trt_mod = trtorch::EmbedEngineInNewModule(engine, compile_spec);
+  auto trt_mod = trtorch::EmbedEngineInNewModule(engine, compile_spec.device);
 
   torch::jit::IValue trt_results_ivalues = trtorch::tests::util::RunModuleForward(mod, inputs_ivalues);
   std::vector<at::Tensor> trt_results;

Original file line number	Diff line number	Diff line change
`@@ -517,15 +517,15 @@ TRTORCH_API std::string ConvertGraphToTRTEngine(`
`517`	`517`	`* in a TorchScript module`
`518`	`518`	`*`
`519`	`519`	`* @param engine: std::string - Pre-built serialized TensorRT engine`
`520`		`- * @param info: trtorch::CompileSpec - Compilation settings`
	`520`	`+ * @param info: CompileSepc::Device - Device information`
`521`	`521`	`*`
`522`	`522`	`* Takes a pre-built serialized TensorRT engine and embeds it in a TorchScript`
`523`	`523`	`* module. Registers execution of the engine as the forward method of the module`
`524`	`524`	`* Forward is defined as: forward(Tensor[]) -> Tensor[]`
`525`	`525`	`*`
`526`	`526`	`* @return: A new module trageting a TensorRT engine`
`527`	`527`	`*/`
`528`		`-TRTORCH_API torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec info);`
	`528`	`+TRTORCH_API torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec::Device device);`
`529`	`529`
`530`	`530`	`/**`
`531`	`531`	`* @brief Set gpu device id`
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ namespace trtorch {`
`9`	`9`
`10`	`10`	`// Defined in compile_spec.cpp`
`11`	`11`	`core::CompileSpec to_internal_compile_spec(CompileSpec external);`
	`12`	`+core::runtime::CudaDevice to_internal_cuda_device(CompileSpec::Device device);`
`12`	`13`
`13`	`14`	`bool CheckMethodOperatorSupport(const torch::jit::script::Module& module, std::string method_name) {`
`14`	`15`	`return core::CheckMethodOperatorSupport(module, method_name);`
`@@ -31,8 +32,8 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module`
`31`	`32`	`return core::CompileGraph(module, to_internal_compile_spec(info));`
`32`	`33`	`}`
`33`	`34`
`34`		`-torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec info) {`
`35`		`- return core::EmbedEngineInNewModule(engine, to_internal_compile_spec(info));`
	`35`	`+torch::jit::Module EmbedEngineInNewModule(const std::string& engine, CompileSpec::Device device) {`
	`36`	`+ return core::EmbedEngineInNewModule(engine, to_internal_cuda_device(device));`
`36`	`37`	`}`
`37`	`38`
`38`	`39`	`std::string get_build_info() {`
Original file line number	Diff line number	Diff line change
`@@ -119,8 +119,8 @@ bool CheckMethodOperatorSupport(const torch::jit::Module& module, const std::str`
`119`	`119`	`return core::CheckMethodOperatorSupport(module, method_name);`
`120`	`120`	`}`
`121`	`121`
`122`		`-torch::jit::Module EmbedEngineInNewModule(const py::bytes& engine, CompileSpec& info) {`
`123`		`- return core::EmbedEngineInNewModule(engine, info.toInternalCompileSpec());`
	`122`	`+torch::jit::Module EmbedEngineInNewModule(const py::bytes& engine, core::runtime::CudaDevice& device) {`
	`123`	`+ return core::EmbedEngineInNewModule(engine, device);`
`124`	`124`	`}`
`125`	`125`
`126`	`126`	`std::string get_build_info() {`