Merge pull request #499 from NVIDIA/vit

narendasan · web-flow · commit 92e5ff8d8129 · 2021-06-30T13:39:27.000-06:00
fix: Fix linear lowering pass, lift layer_norm scale layer restriction and matmul layer nbdims restriction
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -182,8 +182,8 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
   torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
   std::vector<std::shared_ptr<torch::jit::Graph>> graphs;
   for (const torch::jit::script::Method& method : mod.get_methods()) {
-    // Don't convert hidden methods
-    if (method.name().rfind("_", 0)) {
+    // Compile only forward methods. forward method contains the entire graph.
+    if (method.name().compare("forward") == 0) {
       auto new_g = std::make_shared<torch::jit::Graph>();
       auto graph_and_parameters = lowering::Lower(mod, method.name());
 
@@ -256,8 +256,8 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
   torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
   std::vector<std::shared_ptr<torch::jit::Graph>> graphs;
   for (const torch::jit::script::Method& method : mod.get_methods()) {
-    // Don't convert hidden methods
-    if (method.name().rfind("_", 0)) {
+    // Compile only forward methods. forward method contains the entire graph.
+    if (method.name().compare("forward") == 0) {
       auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
       auto new_g = std::make_shared<torch::jit::Graph>();
       AddEngineToGraph(new_mod, new_g, engine);
diff --git a/core/conversion/converters/impl/layer_norm.cpp b/core/conversion/converters/impl/layer_norm.cpp
@@ -117,12 +117,31 @@ auto layer_norm_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns().
       }
 
       auto power = Weights(ctx, at::ones(expand_size));
-      auto scale_nd = ctx->net->addScaleNd(
-          *div_out, nvinfer1::ScaleMode::kELEMENTWISE, beta_weights.data, gamma_weights.data, power.data, 1);
-      scale_nd->setName((util::node_info(n) + "_scale_nd").c_str());
-      auto scale_nd_out = scale_nd->getOutput(0);
 
-      ctx->AssociateValueAndTensor(n->outputs()[0], scale_nd_out);
+      auto gamma_tensor = ctx->net->addConstant(gamma_weights.shape, gamma_weights.data)->getOutput(0);
+      auto scale_l = add_elementwise(
+          ctx, nvinfer1::ElementWiseOperation::kPROD, div_out, gamma_tensor, (util::node_info(n) + "_scale").c_str());
+
+      auto beta_tensor = ctx->net->addConstant(beta_weights.shape, beta_weights.data)->getOutput(0);
+      auto shift_l = add_elementwise(
+          ctx,
+          nvinfer1::ElementWiseOperation::kSUM,
+          scale_l->getOutput(0),
+          beta_tensor,
+          (util::node_info(n) + "_shift").c_str());
+
+      auto power_tensor = ctx->net->addConstant(power.shape, power.data)->getOutput(0);
+      auto power_l = add_elementwise(
+          ctx,
+          nvinfer1::ElementWiseOperation::kPOW,
+          shift_l->getOutput(0),
+          power_tensor,
+          (util::node_info(n) + "_power").c_str());
+
+      power_l->setName((util::node_info(n) + "_scale_nd").c_str());
+      auto power_l_out = power_l->getOutput(0);
+
+      ctx->AssociateValueAndTensor(n->outputs()[0], power_l_out);
       return true;
     }});
 
diff --git a/core/conversion/converters/impl/matrix_multiply.cpp b/core/conversion/converters/impl/matrix_multiply.cpp
@@ -1,3 +1,4 @@
+#include "core/conversion/converters/converter_util.h"
 #include "core/conversion/converters/converters.h"
 #include "core/util/prelude.h"
 
@@ -13,10 +14,14 @@ auto mm_registrations TRTORCH_UNUSED =
         .pattern({"aten::matmul(Tensor self, Tensor other) -> (Tensor)",
                   [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                     auto self = args[0].ITensorOrFreeze(ctx);
-                    LOG_DEBUG("self tensor shape: " << self->getDimensions());
-
                     auto other = args[1].ITensorOrFreeze(ctx);
-                    LOG_DEBUG("other tensor shape: " << other->getDimensions());
+                    // Ensure self and other tensors have same nbDims by expanding the dimensions (from 0 axis) if
+                    // necessary.
+                    if (self->getDimensions().nbDims < other->getDimensions().nbDims) {
+                      self = addPadding(ctx, n, self, other->getDimensions().nbDims, false, false);
+                    } else {
+                      other = addPadding(ctx, n, other, self->getDimensions().nbDims, false, false);
+                    }
 
                     auto mm_layer = ctx->net->addMatrixMultiply(
                         *self, nvinfer1::MatrixOperation::kNONE, *other, nvinfer1::MatrixOperation::kNONE);
@@ -73,4 +78,4 @@ auto mm_registrations TRTORCH_UNUSED =
 } // namespace converters
 } // namespace conversion
 } // namespace core
-} // namespace trtorch
+} // namespace trtorch
diff --git a/core/lowering/passes/linear_to_addmm.cpp b/core/lowering/passes/linear_to_addmm.cpp
@@ -1,23 +1,55 @@
-#include "torch/csrc/jit/passes/subgraph_rewrite.h"
+
+#include <torch/csrc/jit/runtime/operator.h>
+#include "torch/csrc/jit/ir/alias_analysis.h"
+#include "torch/csrc/jit/jit_log.h"
+#include "torch/csrc/jit/passes/constant_propagation.h"
+#include "torch/csrc/jit/passes/dead_code_elimination.h"
+#include "torch/csrc/jit/passes/guard_elimination.h"
+#include "torch/csrc/jit/passes/peephole.h"
+#include "torch/csrc/jit/runtime/graph_executor.h"
 
 #include "core/util/prelude.h"
+#include "torch/csrc/jit/passes/subgraph_rewrite.h"
 
 namespace trtorch {
 namespace core {
 namespace lowering {
 namespace passes {
 
+void replaceLinearWithBiasNonePattern(std::shared_ptr<torch::jit::Graph> graph) {
+  // Define the decomposition function for aten::linear for the case where bias (mat2) is None.
+  static torch::jit::CompilationUnit decompose_funcs(R"SCRIPT(
+     def linear(self: Tensor, mat1: Tensor, mat2: Tensor):
+         return torch.matmul(self, mat1.t())
+     )SCRIPT");
+
+  // Iterate through nodes and search for aten::linear nodes where bias is not a Tensor (includes bias=None case)
+  auto block = graph->block();
+  for (auto it = block->nodes().begin(); it != block->nodes().end(); it++) {
+    auto n = *it;
+    if (n->kind().toQualString() == std::string("aten::linear")) {
+      auto input_values = n->inputs();
+      // input_values[2] is the bias. If none, replace it with the decomposed linear graph.
+      if (input_values[2]->type()->isSubtypeOf(c10::TensorType::get())) {
+        continue;
+      } else {
+        torch::jit::WithInsertPoint guard(*it);
+        std::shared_ptr<torch::jit::Graph> d_graph = decompose_funcs.get_function("linear").graph();
+        torch::jit::Value* new_output = insertGraph(*it->owningGraph(), *d_graph, it->inputs()).at(0);
+        new_output->setType(it->output()->type());
+        it->output()->replaceAllUsesWith(new_output);
+        it.destroyCurrent();
+      }
+    }
+  }
+}
+
 void LinearToAddMM(std::shared_ptr<torch::jit::Graph>& graph) {
   // TensorRT implicitly adds a flatten layer infront of FC layers if necessary
   std::string flatten_linear_pattern = R"IR(
         graph(%input, %weight, %bias):
             %res = aten::linear(%input, %weight, %bias)
             return (%res))IR";
-  std::string flatten_linear_bias_none_pattern = R"IR(
-        graph(%input, %weight):
-            %bias: Tensor? = prim::Constant()
-            %res = aten::linear(%input, %weight, %bias)
-            return (%res))IR";
 
   std::string fused_linear = R"IR(
         graph(%input, %weight_t, %bias):
@@ -27,20 +59,13 @@ void LinearToAddMM(std::shared_ptr<torch::jit::Graph>& graph) {
             %b_f: Tensor = trt::const(%bias)
             %out: Tensor = aten::add(%b_f, %mm, %1)
             return (%out))IR";
-  std::string fused_linear_bias_none = R"IR(
-        graph(%input, %weight_t):
-            %weight = aten::t(%weight_t)
-            %mm: Tensor = aten::matmul(%input, %weight)
-            return (%mm))IR";
+
+  // First find and replace aten::linear nodes with non-tensor bias values.
+  replaceLinearWithBiasNonePattern(graph);
 
   torch::jit::SubgraphRewriter flatten_linear_to_linear;
   flatten_linear_to_linear.RegisterRewritePattern(flatten_linear_pattern, fused_linear);
   flatten_linear_to_linear.runOnGraph(graph);
-
-  torch::jit::SubgraphRewriter flatten_linear_bias_none_to_linear;
-  flatten_linear_bias_none_to_linear.RegisterRewritePattern(flatten_linear_bias_none_pattern, fused_linear_bias_none);
-  flatten_linear_bias_none_to_linear.runOnGraph(graph);
-  LOG_GRAPH("Post linear to addmm: " << *graph);
 }
 
 } // namespace passes
diff --git a/tests/core/conversion/converters/test_layer_norm.cpp b/tests/core/conversion/converters/test_layer_norm.cpp
@@ -118,3 +118,31 @@ TEST(Converters, ATenLayerNormConvertsCorrectlyLast1Dims) {
 
   ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6));
 }
+
+TEST(Converters, ATenLayerNormConvertsCorrectly3dInput1dNormalizedShape) {
+  const auto graph = R"IR(
+      graph(%0 : Tensor,
+            %gamma: Float(197, 768),
+            %beta: Float(197, 768)):
+        %1: int = prim::Constant[value=768]()
+        %4 : int[] = prim::ListConstruct(%1)
+        %7 : bool = prim::Constant[value=0]()
+        %8 : float = prim::Constant[value=1.0000000000000001e-05]()
+        %9 : Tensor = aten::layer_norm(%0, %4, %gamma, %beta, %8, %7)
+        return (%9))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, g.get());
+
+  auto in = at::randint(1, 10, {1, 197, 768}, {at::kCUDA});
+  auto gamma = at::randint(1, 10, {768}, {at::kCUDA});
+  auto beta = at::randint(1, 10, {768}, {at::kCUDA});
+
+  auto params = trtorch::core::conversion::get_named_params(g->inputs(), {gamma, beta});
+  auto jit_results = trtorch::tests::util::RunGraph(g, params, {in});
+
+  params = trtorch::core::conversion::get_named_params(g->inputs(), {gamma, beta});
+  auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in});
+
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6));
+}
diff --git a/tests/core/conversion/converters/test_matrix_multiply.cpp b/tests/core/conversion/converters/test_matrix_multiply.cpp
@@ -26,6 +26,27 @@ TEST(Converters, ATenMMConvertsCorrectly) {
   ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt, 2e-6));
 }
 
+TEST(Converters, ATenMMWithDiffShapesConvertsCorrectly) {
+  const auto graph = R"IR(
+      graph(%0 : Tensor, %1 : Tensor):
+        %2 : Tensor = aten::matmul(%0, %1)
+        return (%2))IR";
+
+  auto g = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(graph, g.get());
+
+  auto in1 = at::randint(0, 5, {2, 3}, {at::kCUDA});
+  auto in2 = at::randint(0, 5, {3, 3, 2}, {at::kCUDA});
+  auto params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto jit_results = trtorch::tests::util::RunGraph(g, params, {in1, in2});
+
+  params = trtorch::core::conversion::get_named_params(g->inputs(), {});
+  auto trt_results = trtorch::tests::util::RunGraphEngine(g, params, {in1, in2});
+  auto trt = trt_results[0].reshape_as(jit_results[0]);
+
+  ASSERT_TRUE(trtorch::tests::util::almostEqual(jit_results[0], trt, 2e-6));
+}
+
 TEST(Converters, ATenBMMConvertsCorrectly) {
   const auto graph = R"IR(
       graph(%0 : Tensor, %1 : Tensor):
diff --git a/tests/core/lowering/test_linear_to_addmm.cpp b/tests/core/lowering/test_linear_to_addmm.cpp
@@ -31,4 +31,27 @@ TEST(LoweringPasses, LinearToAddMM) {
   torch::jit::parseIR(target_graph, &*tg);
 
   ASSERT_TRUE(!torch::jit::findPatternMatches(*tg, *sg).empty());
-}
+}
+
+TEST(LoweringPasses, LinearToAddMMBiasNone) {
+  std::string source_graph = R"IR(
+    graph(%input, %weight):
+      %bias : None = prim::Constant()
+      %res = aten::linear(%input, %weight, %bias)
+      return (%res))IR";
+  std::string target_graph = R"IR(
+    graph(%input, %weight_t):
+      %weight = aten::t(%weight_t)
+      %mm: Tensor = aten::matmul(%input, %weight)
+      return (%mm))IR";
+
+  trtorch::core::util::logging::get_logger().set_reportable_log_level(trtorch::core::util::logging::LogLevel::kGRAPH);
+  auto sg = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(source_graph, &*sg);
+  trtorch::core::lowering::passes::LinearToAddMM(sg);
+
+  auto tg = std::make_shared<torch::jit::Graph>();
+  torch::jit::parseIR(target_graph, &*tg);
+
+  ASSERT_TRUE(!torch::jit::findPatternMatches(*tg, *sg).empty());
+}
diff --git a/tests/modules/hub.py b/tests/modules/hub.py
@@ -2,6 +2,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torchvision.models as models
+import timm
 
 models = {
     "alexnet": {
@@ -64,6 +65,10 @@
     "faster_rcnn": {
         "model": models.detection.fasterrcnn_resnet50_fpn(pretrained=True),
         "path": "script"
+    },
+    "vit": {
+        "model": timm.create_model('efficientnet_b0', pretrained=True),
+        "path": "script"
     }
 }