pytorch · narendasan · Jul 12, 2021 · Jul 1, 2021 · Jul 2, 2021 · narendasan
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -223,7 +223,7 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
           auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
           auto temp_g = std::make_shared<torch::jit::Graph>();
           auto device_spec = convert_cfg.engine_settings.device;
-          auto cuda_device = runtime::get_device_info(device_spec.gpu_id, device_spec.device_type);
+          auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
           AddEngineToGraph(new_mod, temp_g, engine, cuda_device, trt_engine_id.str(), true);
 
           seg_block.update_graph(temp_g);
@@ -265,7 +265,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
       auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
       auto new_g = std::make_shared<torch::jit::Graph>();
       auto device_spec = cfg.convert_info.engine_settings.device;
-      auto cuda_device = runtime::get_device_info(device_spec.gpu_id, device_spec.device_type);
+      auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
       AddEngineToGraph(new_mod, new_g, engine, cuda_device);
       auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
       auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
@@ -277,9 +277,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
   return new_mod;
 }
 
-torch::jit::script::Module EmbedEngineInNewModule(
-    const std::string& engine,
-    trtorch::core::runtime::CudaDevice cuda_device) {
+torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device) {
   std::ostringstream engine_id;
   engine_id << reinterpret_cast<const int*>(&engine);
   torch::jit::script::Module new_mod("tensorrt_engine_mod_" + engine_id.str());

diff --git a/core/runtime/BUILD b/core/runtime/BUILD
@@ -10,8 +10,11 @@ config_setting(
 cc_library(
     name = "runtime",
     srcs = [
+        "CudaDevice.cpp",
+        "DeviceList.cpp",
         "TRTEngine.cpp",
         "register_trt_op.cpp",
+        "runtime.cpp"
     ],
     hdrs = [
         "runtime.h",

diff --git a/core/runtime/CudaDevice.cpp b/core/runtime/CudaDevice.cpp
@@ -0,0 +1,106 @@
+#include "cuda_runtime.h"
+
+#include "core/runtime/runtime.h"
+#include "core/util/prelude.h"
+
+namespace trtorch {
+namespace core {
+namespace runtime {
+
+const std::string DEVICE_INFO_DELIM = "%";
+
+typedef enum { ID_IDX = 0, SM_MAJOR_IDX, SM_MINOR_IDX, DEVICE_TYPE_IDX, DEVICE_NAME_IDX } SerializedDeviceInfoIndex;
+
+CudaDevice::CudaDevice() : id{-1}, major{-1}, minor{-1}, device_type{nvinfer1::DeviceType::kGPU} {}
+
+CudaDevice::CudaDevice(int64_t gpu_id, nvinfer1::DeviceType device_type) {
+  CudaDevice cuda_device;
+  cudaDeviceProp device_prop;
+
+  // Device ID
+  this->id = gpu_id;
+
+  // Get Device Properties
+  cudaGetDeviceProperties(&device_prop, gpu_id);
+
+  // Compute capability major version
+  this->major = device_prop.major;
+
+  // Compute capability minor version
+  this->minor = device_prop.minor;
+
+  std::string device_name(device_prop.name);
+
+  // Set Device name
+  this->device_name = device_name;
+
+  // Set Device Type
+  this->device_type = device_type;
+}
+
+// NOTE: Serialization Format for Device Info:
+// id%major%minor%(enum)device_type%device_name
+
+CudaDevice::CudaDevice(std::string device_info) {
+  LOG_DEBUG("Deserializing Device Info: " << device_info);
+
+  std::vector<std::string> tokens;
+  int64_t start = 0;
+  int64_t end = device_info.find(DEVICE_INFO_DELIM);
+
+  while (end != -1) {
+    tokens.push_back(device_info.substr(start, end - start));
+    start = end + DEVICE_INFO_DELIM.size();
+    end = device_info.find(DEVICE_INFO_DELIM, start);
+  }
+  tokens.push_back(device_info.substr(start, end - start));
+
+  TRTORCH_CHECK(tokens.size() == DEVICE_NAME_IDX + 1, "Unable to deserializable program target device infomation");
+
+  id = std::stoi(tokens[ID_IDX]);
+  major = std::stoi(tokens[SM_MAJOR_IDX]);
+  minor = std::stoi(tokens[SM_MINOR_IDX]);
+  device_type = (nvinfer1::DeviceType)(std::stoi(tokens[DEVICE_TYPE_IDX]));
+  device_name = tokens[DEVICE_NAME_IDX];
+
+  LOG_DEBUG("Deserialized Device Info: " << *this);
+}
+
+std::string CudaDevice::serialize() {
+  std::vector<std::string> content;
+  content.resize(DEVICE_NAME_IDX + 1);
+
+  content[ID_IDX] = std::to_string(id);
+  content[SM_MAJOR_IDX] = std::to_string(major);
+  content[SM_MINOR_IDX] = std::to_string(minor);
+  content[DEVICE_TYPE_IDX] = std::to_string((int64_t)device_type);
+  content[DEVICE_NAME_IDX] = device_name;
+
+  std::stringstream ss;
+  for (size_t i = 0; i < content.size() - 1; i++) {
+    ss << content[i] << DEVICE_INFO_DELIM;
+  }
+  ss << content[DEVICE_NAME_IDX];
+
+  std::string serialized_device_info = ss.str();
+
+  LOG_DEBUG("Serialized Device Info: " << serialized_device_info);
+
+  return serialized_device_info;
+}
+
+std::string CudaDevice::getSMCapability() const {
+  std::stringstream ss;
+  ss << major << "." << minor;
+  return ss.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const CudaDevice& device) {
+  os << "Device(ID: " << device.id << ", Name: " << device.device_name << ", SM Capability: " << device.major << '.'
+     << device.minor << ", Type: " << device.device_type << ')';
+  return os;
+}
+
+} // namespace runtime
+} // namespace core
+} // namespace trtorch
diff --git a/core/runtime/DeviceList.cpp b/core/runtime/DeviceList.cpp
@@ -0,0 +1,45 @@
+#include "cuda_runtime.h"
+
+#include "core/runtime/runtime.h"
+#include "core/util/prelude.h"
+
+namespace trtorch {
+namespace core {
+namespace runtime {
+
+DeviceList::DeviceList() {
+  int num_devices = 0;
+  auto status = cudaGetDeviceCount(&num_devices);
+  TRTORCH_ASSERT((status == cudaSuccess), "Unable to read CUDA capable devices. Return status: " << status);
+  for (int i = 0; i < num_devices; i++) {
+    device_list[i] = CudaDevice(i, nvinfer1::DeviceType::kGPU);
+  }
+
+  // REVIEW: DO WE CARE ABOUT DLA?
+
+  LOG_DEBUG("Runtime:\n Available CUDA Devices: \n" << this->dump_list());
+}
+
+void DeviceList::insert(int device_id, CudaDevice cuda_device) {
+  device_list[device_id] = cuda_device;
+}
+
+CudaDevice DeviceList::find(int device_id) {
+  return device_list[device_id];
+}
+
+DeviceList::DeviceMap DeviceList::get_devices() {
+  return device_list;
+}
+
+std::string DeviceList::dump_list() {
+  std::stringstream ss;
+  for (auto it = device_list.begin(); it != device_list.end(); ++it) {
+    ss << "    " << it->second << std::endl;
+  }
+  return ss.str();
+}
+
+} // namespace runtime
+} // namespace core
+} // namespace trtorch
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -11,6 +11,8 @@ namespace trtorch {
 namespace core {
 namespace runtime {
 
+typedef enum { ABI_TARGET_IDX = 0, DEVICE_IDX, ENGINE_IDX } SerializedInfoIndex;
+
 std::string slugify(std::string s) {
   std::replace(s.begin(), s.end(), '.', '_');
   return s;
@@ -30,6 +32,12 @@ TRTEngine::TRTEngine(std::vector<std::string> serialized_info)
           std::string("[] = "),
           util::logging::get_logger().get_reportable_severity(),
           util::logging::get_logger().get_is_colored_output_on()) {
+  TRTORCH_CHECK(
+      serialized_info.size() == ENGINE_IDX + 1, "Program to be deserialized targets an incompatible TRTorch ABI");
+  TRTORCH_CHECK(
+      serialized_info[ABI_TARGET_IDX] == ABI_VERSION,
+      "Program to be deserialized targets a different TRTorch ABI Version ("
+          << serialized_info[ABI_TARGET_IDX] << ") than the TRTorch Runtime ABI (" << ABI_VERSION << ")");
   std::string _name = "deserialized_trt";
   std::string engine_info = serialized_info[ENGINE_IDX];
 
@@ -116,6 +124,7 @@ static auto TRTORCH_UNUSED TRTEngineTSRegistrtion =
               auto trt_engine = std::string((const char*)serialized_trt_engine->data(), serialized_trt_engine->size());
 
               std::vector<std::string> serialize_info;
+              serialize_info.push_back(ABI_VERSION);
               serialize_info.push_back(serialize_device(self->device_info));
               serialize_info.push_back(trt_engine);
               return serialize_info;
@@ -124,123 +133,6 @@ static auto TRTORCH_UNUSED TRTEngineTSRegistrtion =
               return c10::make_intrusive<TRTEngine>(std::move(seralized_info));
             });
 } // namespace
-void set_cuda_device(CudaDevice& cuda_device) {
-  TRTORCH_CHECK((cudaSetDevice(cuda_device.id) == cudaSuccess), "Unable to set device: " << cuda_device.id);
-}
-
-void get_cuda_device(CudaDevice& cuda_device) {
-  int device = 0;
-  TRTORCH_CHECK(
-      (cudaGetDevice(reinterpret_cast<int*>(&device)) == cudaSuccess),
-      "Unable to get current device: " << cuda_device.id);
-  cuda_device.id = static_cast<int64_t>(device);
-  cudaDeviceProp device_prop;
-  TRTORCH_CHECK(
-      (cudaGetDeviceProperties(&device_prop, cuda_device.id) == cudaSuccess),
-      "Unable to get CUDA properties from device:" << cuda_device.id);
-  cuda_device.set_major(device_prop.major);
-  cuda_device.set_minor(device_prop.minor);
-  std::string device_name(device_prop.name);
-  cuda_device.set_device_name(device_name);
-}
-
-std::string serialize_device(CudaDevice& cuda_device) {
-  void* buffer = new char[sizeof(cuda_device)];
-  void* ref_buf = buffer;
-
-  int64_t temp = cuda_device.get_id();
-  memcpy(buffer, reinterpret_cast<int64_t*>(&temp), sizeof(int64_t));
-  buffer = static_cast<char*>(buffer) + sizeof(int64_t);
-
-  temp = cuda_device.get_major();
-  memcpy(buffer, reinterpret_cast<int64_t*>(&temp), sizeof(int64_t));
-  buffer = static_cast<char*>(buffer) + sizeof(int64_t);
-
-  temp = cuda_device.get_minor();
-  memcpy(buffer, reinterpret_cast<int64_t*>(&temp), sizeof(int64_t));
-  buffer = static_cast<char*>(buffer) + sizeof(int64_t);
-
-  auto device_type = cuda_device.get_device_type();
-  memcpy(buffer, reinterpret_cast<char*>(&device_type), sizeof(nvinfer1::DeviceType));
-  buffer = static_cast<char*>(buffer) + sizeof(nvinfer1::DeviceType);
-
-  size_t device_name_len = cuda_device.get_device_name_len();
-  memcpy(buffer, reinterpret_cast<char*>(&device_name_len), sizeof(size_t));
-  buffer = static_cast<char*>(buffer) + sizeof(size_t);
-
-  auto device_name = cuda_device.get_device_name();
-  memcpy(buffer, reinterpret_cast<char*>(&device_name), device_name.size());
-  buffer = static_cast<char*>(buffer) + device_name.size();
-
-  return std::string((const char*)ref_buf, sizeof(int64_t) * 3 + sizeof(nvinfer1::DeviceType) + device_name.size());
-}
-
-CudaDevice deserialize_device(std::string device_info) {
-  CudaDevice ret;
-  char* buffer = new char[device_info.size() + 1];
-  std::copy(device_info.begin(), device_info.end(), buffer);
-  int64_t temp = 0;
-
-  memcpy(&temp, reinterpret_cast<char*>(buffer), sizeof(int64_t));
-  buffer += sizeof(int64_t);
-  ret.set_id(temp);
-
-  memcpy(&temp, reinterpret_cast<char*>(buffer), sizeof(int64_t));
-  buffer += sizeof(int64_t);
-  ret.set_major(temp);
-
-  memcpy(&temp, reinterpret_cast<char*>(buffer), sizeof(int64_t));
-  buffer += sizeof(int64_t);
-  ret.set_minor(temp);
-
-  nvinfer1::DeviceType device_type;
-  memcpy(&device_type, reinterpret_cast<char*>(buffer), sizeof(nvinfer1::DeviceType));
-  buffer += sizeof(nvinfer1::DeviceType);
-
-  size_t size;
-  memcpy(&size, reinterpret_cast<size_t*>(&buffer), sizeof(size_t));
-  buffer += sizeof(size_t);
-
-  ret.set_device_name_len(size);
-
-  std::string device_name;
-  memcpy(&device_name, reinterpret_cast<char*>(buffer), size * sizeof(char));
-  buffer += size * sizeof(char);
-
-  ret.set_device_name(device_name);
-
-  return ret;
-}
-
-CudaDevice get_device_info(int64_t gpu_id, nvinfer1::DeviceType device_type) {
-  CudaDevice cuda_device;
-  cudaDeviceProp device_prop;
-
-  // Device ID
-  cuda_device.set_id(gpu_id);
-
-  // Get Device Properties
-  cudaGetDeviceProperties(&device_prop, gpu_id);
-
-  // Compute capability major version
-  cuda_device.set_major(device_prop.major);
-
-  // Compute capability minor version
-  cuda_device.set_minor(device_prop.minor);
-
-  std::string device_name(device_prop.name);
-
-  // Set Device name
-  cuda_device.set_device_name(device_name);
-
-  // Set Device name len for serialization/deserialization
-  cuda_device.set_device_name_len(device_name.size());
-
-  // Set Device Type
-  cuda_device.set_device_type(device_type);
-
-  return cuda_device;
-}
 
 } // namespace runtime
 } // namespace core