pytorch · andi4191 · Mar 2, 2021 · Feb 8, 2021 · Feb 10, 2021 · Feb 21, 2021
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -1,3 +1,4 @@
+#include <cuda_runtime.h>
 #include <iostream>
 #include <memory>
 #include <sstream>
@@ -46,8 +47,9 @@ c10::FunctionSchema GenerateGraphSchema(
 void AddEngineToGraph(
     torch::jit::script::Module mod,
     std::shared_ptr<torch::jit::Graph>& g,
-    std::string& serialized_engine) {
-  auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name(), serialized_engine);
+    std::string& engine,
+    runtime::CudaDevice& device_info) {
+  auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name(), engine, device_info);
   // Get required metadata about the engine out
   auto num_io = engine_ptr->num_io;
   auto name = engine_ptr->name;
@@ -157,12 +159,16 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
   // torch::jit::script::Module new_mod = mod.clone();
   torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
   std::vector<std::shared_ptr<torch::jit::Graph>> graphs;
+
   for (const torch::jit::script::Method& method : mod.get_methods()) {
     // Don't convert hidden methods
     if (method.name().rfind("_", 0)) {
       auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
       auto new_g = std::make_shared<torch::jit::Graph>();
-      AddEngineToGraph(new_mod, new_g, engine);
+
+      auto device_spec = cfg.convert_info.engine_settings.device;
+      auto cuda_device = runtime::get_device_info(device_spec.gpu_id, device_spec.device_type);
+      AddEngineToGraph(new_mod, new_g, engine, cuda_device);
       auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
       auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
       new_mod.type()->addMethod(new_method);
@@ -174,7 +180,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
 }
 
 void set_device(const int gpu_id) {
-  TRTORCH_ASSERT(cudaSetDevice(gpu_id) == cudaSuccess, "Unable to set CUDA device: " << gpu_id);
+  TRTORCH_CHECK((cudaSetDevice(gpu_id) == cudaSuccess), "Unable to set CUDA device: " << gpu_id);
 }
 
 } // namespace core

diff --git a/core/conversion/converters/impl/element_wise.cpp b/core/conversion/converters/impl/element_wise.cpp
@@ -68,6 +68,21 @@ nvinfer1::ILayer* add_elementwise(
   return ele;
 }
 
+nvinfer1::ITensor* clamp_util(
+    ConversionCtx* ctx,
+    const torch::jit::Node* n,
+    nvinfer1::ITensor* self,
+    float limit,
+    nvinfer1::ElementWiseOperation op_type,
+    std::string str) {
+  nvinfer1::ITensor* clamp_layer_out = self;
+  auto limitTensor = tensor_to_const(ctx, torch::tensor({limit}));
+  auto limit_layer = add_elementwise(ctx, op_type, clamp_layer_out, limitTensor, util::node_info(n) + str);
+  TRTORCH_CHECK(limit_layer, "Unable to create elementwise " << str << " layer for node: " << *n);
+  clamp_layer_out = limit_layer->getOutput(0);
+  return clamp_layer_out;
+}
+
 auto element_wise_registrations TRTORCH_UNUSED =
     RegisterNodeConversionPatterns()
         .pattern({"aten::add.Tensor(Tensor self, Tensor other, Scalar alpha=1) -> "
@@ -145,38 +160,58 @@ auto element_wise_registrations TRTORCH_UNUSED =
                     return true;
                   }})
         .pattern({"aten::clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    // Compute min(max(min_threshold, input), max_threshold)
+                    auto self = args[0].ITensorOrFreeze(ctx);
+                    auto clamp_layer_out = self;
+
+                    if (args[1].isIValue() && args[1].IValue()->isScalar() && args[2].isIValue() &&
+                        args[2].IValue()->isScalar()) {
+                      auto alpha = args[1].unwrapToScalar().to<float>();
+                      auto beta = args[2].unwrapToScalar().to<float>();
+                      auto clip_layer = ctx->net->addActivation(*self, nvinfer1::ActivationType::kCLIP);
+                      TRTORCH_CHECK(clip_layer, "Unable to create clip layer for node: " << *n);
+                      clip_layer->setAlpha(alpha);
+                      clip_layer->setBeta(beta);
+                      clamp_layer_out = clip_layer->getOutput(0);
+                    } else if (args[1].isIValue() && args[1].IValue()->isScalar()) {
+                      auto limit = args[1].unwrapToScalar().to<float>();
+                      clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMAX, "_max");
+                    } else if (args[2].isIValue() && args[2].IValue()->isScalar()) {
+                      auto limit = args[2].unwrapToScalar().to<float>();
+                      clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMIN, "_min");
+                    }
+
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], clamp_layer_out);
+                    LOG_DEBUG("Clamp layer output tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::clamp_min(Tensor self, Scalar min) -> (Tensor)",
                   [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                     // Compute min(max(min_threshold, input), max_threshold)
                     auto self = args[0].ITensorOrFreeze(ctx);
                     auto clamp_layer_out = self;
                     if (args[1].isIValue() && args[1].IValue()->isScalar()) {
-                      auto minScalar = args[1].unwrapToScalar().to<float>();
-                      auto minTensor = tensor_to_const(ctx, torch::tensor({minScalar}));
-                      auto max_layer = add_elementwise(
-                          ctx,
-                          nvinfer1::ElementWiseOperation::kMAX,
-                          clamp_layer_out,
-                          minTensor,
-                          util::node_info(n) + std::string("_max"));
-                      TRTORCH_CHECK(max_layer, "Unable to create elementwise max layer for node: " << *n);
-                      clamp_layer_out = max_layer->getOutput(0);
+                      auto limit = args[1].unwrapToScalar().to<float>();
+                      clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMAX, "_max");
                     }
 
-                    if (args[2].isIValue() && args[2].IValue()->isScalar()) {
-                      auto maxScalar = args[2].unwrapToScalar().to<float>();
-                      auto maxTensor = tensor_to_const(ctx, torch::tensor({maxScalar}));
-                      auto min_layer = add_elementwise(
-                          ctx,
-                          nvinfer1::ElementWiseOperation::kMIN,
-                          clamp_layer_out,
-                          maxTensor,
-                          util::node_info(n) + std::string("_min"));
-                      TRTORCH_CHECK(min_layer, "Unable to create elementwise min layer for node: " << *n);
-                      clamp_layer_out = min_layer->getOutput(0);
+                    auto out = ctx->AssociateValueAndTensor(n->outputs()[0], clamp_layer_out);
+                    LOG_DEBUG("clamp_min layer output tensor shape: " << out->getDimensions());
+                    return true;
+                  }})
+        .pattern({"aten::clamp_max(Tensor self, Scalar max) -> (Tensor)",
+                  [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+                    // Compute min(max(min_threshold, input), max_threshold)
+                    auto self = args[0].ITensorOrFreeze(ctx);
+                    auto clamp_layer_out = self;
+                    if (args[1].isIValue() && args[1].IValue()->isScalar()) {
+                      auto limit = args[1].unwrapToScalar().to<float>();
+                      clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMIN, "_min");
                     }
 
                     auto out = ctx->AssociateValueAndTensor(n->outputs()[0], clamp_layer_out);
-                    LOG_DEBUG("Clamp layer output tensor shape: " << out->getDimensions());
+                    LOG_DEBUG("clamp_max layer output tensor shape: " << out->getDimensions());
                     return true;
                   }})
         .pattern({"aten::sub.Tensor(Tensor self, Tensor other, Scalar alpha=1) -> "