Merge pull request #971 from NVIDIA/torch_tensorrt_1.1.0

narendasan · web-flow · commit 11af59a250f4 · 2022-04-11T19:22:57.000-07:00
Torch-TensorRT 1.1.0
diff --git a/.bazelversion b/.bazelversion
@@ -1 +1 @@
-4.2.1
+5.1.1
diff --git a/core/lowering/passes/linear_to_addmm.cpp b/core/lowering/passes/linear_to_addmm.cpp
@@ -1,15 +1,15 @@
 
 #include <torch/csrc/jit/runtime/operator.h>
+#include "core/util/prelude.h"
+#include "torch/csrc/jit/api/function_impl.h"
 #include "torch/csrc/jit/ir/alias_analysis.h"
 #include "torch/csrc/jit/jit_log.h"
 #include "torch/csrc/jit/passes/constant_propagation.h"
 #include "torch/csrc/jit/passes/dead_code_elimination.h"
 #include "torch/csrc/jit/passes/guard_elimination.h"
 #include "torch/csrc/jit/passes/peephole.h"
-#include "torch/csrc/jit/runtime/graph_executor.h"
-#include "torch/csrc/jit/api/function_impl.h"
 #include "torch/csrc/jit/passes/subgraph_rewrite.h"
-#include "core/util/prelude.h"
+#include "torch/csrc/jit/runtime/graph_executor.h"
 
 namespace torch_tensorrt {
 namespace core {
@@ -34,7 +34,8 @@ void replaceLinearWithBiasNonePattern(std::shared_ptr<torch::jit::Graph> graph)
         continue;
       } else {
         torch::jit::WithInsertPoint guard(*it);
-        std::shared_ptr<torch::jit::Graph> d_graph = toGraphFunction(decompose_funcs.get_function("linear")).graph();;
+        std::shared_ptr<torch::jit::Graph> d_graph = toGraphFunction(decompose_funcs.get_function("linear")).graph();
+        ;
         torch::jit::Value* new_output = insertGraph(*it->owningGraph(), *d_graph, it->inputs()).at(0);
         new_output->setType(it->output()->type());
         it->output()->replaceAllUsesWith(new_output);
diff --git a/core/lowering/passes/reduce_gelu.cpp b/core/lowering/passes/reduce_gelu.cpp
@@ -12,8 +12,8 @@ void ReduceGelu(std::shared_ptr<torch::jit::Graph>& graph) {
             %out : Tensor = aten::gelu(%x)
             return (%out))IR";
 
-  // This gelu_approximate_pattern schema exists in 21.11, 21.12, 22.01 containers of pytorch. These container versions use
-  // an unmerged PR in pytorch : https://github.com/pytorch/pytorch/pull/61439. We reduce this to regular Gelu.
+  // This gelu_approximate_pattern schema exists in 21.11, 21.12, 22.01 containers of pytorch. These container versions
+  // use an unmerged PR in pytorch : https://github.com/pytorch/pytorch/pull/61439. We reduce this to regular Gelu.
   std::string gelu_approximate_pattern = R"IR(
         graph(%x : Tensor, %approx):
             %out : Tensor = aten::gelu(%x, %approx)
@@ -64,7 +64,8 @@ void ReduceGelu(std::shared_ptr<torch::jit::Graph>& graph) {
   map_gelu_to_pointwise_ops.runOnGraph(graph);
 
   torch::jit::SubgraphRewriter map_gelu_approximate_to_pointwise_ops;
-  map_gelu_approximate_to_pointwise_ops.RegisterRewritePattern(gelu_approximate_pattern, gelu_reduce_multi_input_pattern);
+  map_gelu_approximate_to_pointwise_ops.RegisterRewritePattern(
+      gelu_approximate_pattern, gelu_reduce_multi_input_pattern);
   map_gelu_approximate_to_pointwise_ops.runOnGraph(graph);
 
   LOG_GRAPH("Post lowering of [aten::gelu] -> " << *graph);
diff --git a/core/partitioning/partitioning.cpp b/core/partitioning/partitioning.cpp
@@ -231,11 +231,11 @@ std::unordered_map<torch::jit::Value*, usage_info> getInputUsageCounts(
   return usage_counts;
 }
 
-std::unordered_map<size_t, std::list<SegmentedBlock>::iterator>
-getIdxtoIterMap(std::list<SegmentedBlock> &segmented_blocks_list) {
+std::unordered_map<size_t, std::list<SegmentedBlock>::iterator> getIdxtoIterMap(
+    std::list<SegmentedBlock>& segmented_blocks_list) {
   std::unordered_map<size_t, std::list<SegmentedBlock>::iterator> idx_to_iter;
   auto iter = segmented_blocks_list.begin();
-  for (int i = 0; i < segmented_blocks_list.size(); ++i, ++iter) {
+  for (uint64_t i = 0; i < segmented_blocks_list.size(); ++i, ++iter) {
     idx_to_iter[i] = iter;
   }
   return idx_to_iter;
@@ -283,22 +283,24 @@ void resolveNonTensorInputBlocks(PartitionedGraph& segmented_blocks) {
 }
 
 void resolveTensorListInputBlocks(PartitionedGraph& segmented_blocks) {
-  // usage_counts is a map with key as non-tensor/tensorlist inputs and value as the idx of segmented block which produces/contains it.
-  auto usage_counts = getInputUsageCounts(
-      segmented_blocks, [](torch::jit::Value* input) -> bool { return isTensorList(input); });
+  // usage_counts is a map with key as non-tensor/tensorlist inputs and value as the idx of segmented block which
+  // produces/contains it.
+  auto usage_counts =
+      getInputUsageCounts(segmented_blocks, [](torch::jit::Value* input) -> bool { return isTensorList(input); });
 
   // Get idx of the segblock to its iterator mapping
   std::list<SegmentedBlock> segmented_blocks_list(segmented_blocks.cbegin(), segmented_blocks.cend());
   auto idx_to_iter = getIdxtoIterMap(segmented_blocks_list);
 
   std::unordered_set<int> updated_segments;
   // we need to re-segment TensorRT segments whose inputs are TensorLists
-  for (auto &use : usage_counts) {
+  for (auto& use : usage_counts) {
     auto use_info = use.second;
     // For a particular tensorlist input, traverse through all ids of segmented blocks whose target is TensorRT
     for (auto i : use_info.tensorrt_use_id) {
       if (!updated_segments.count(i)) {
-        // tensorlistinput_to_segblock is a mapping from {tensorlist input : segmented block which produced this tensorlist input}
+        // tensorlistinput_to_segblock is a mapping from {tensorlist input : segmented block which produced this
+        // tensorlist input}
         std::unordered_map<torch::jit::Value*, SegmentedBlock> tensorlistinput_to_segblock;
         for (auto input : segmented_blocks[i].raw_inputs()) {
           if (isTensorList(input)) {
@@ -308,18 +310,20 @@ void resolveTensorListInputBlocks(PartitionedGraph& segmented_blocks) {
 
         // For each tensorlist input in tensorlistinput_to_segblock, get the node which actually uses this input.
         // Once we retrieve the node, we remove it from the current TensorRT segmented_blocks[i]. This node should be
-        // added to block that generated/produced (can be obtained via produce_id) this tensorlist input in the first place.
+        // added to block that generated/produced (can be obtained via produce_id) this tensorlist input in the first
+        // place.
         auto seg_blocks = segmentBlocksWithTensorListInputs(segmented_blocks[i], tensorlistinput_to_segblock);
         auto append_blocks = seg_blocks.first;
         auto trt_block = seg_blocks.second;
-        // Remove the current TensorRT seg_block and replace it with new TRT block (non empty) which has the node that uses tensorlist input removed.
+        // Remove the current TensorRT seg_block and replace it with new TRT block (non empty) which has the node that
+        // uses tensorlist input removed.
         auto next_iter = segmented_blocks_list.erase(idx_to_iter[i]);
         if (trt_block.raw_nodes().size() > 0) {
           segmented_blocks_list.insert(next_iter, trt_block);
         }
 
         // append blocks' nodes to the producer seg_block
-        for (auto append_block: append_blocks) {
+        for (auto append_block : append_blocks) {
           auto input = append_block.first; // corresponds to the tensorlist input
           auto block = append_block.second;
           // append nodes to segmented_blocks_list
diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp
@@ -1,5 +1,5 @@
-#include <ATen/ATen.h>
 #include "core/partitioning/shape_analysis.h"
+#include <ATen/ATen.h>
 #include "core/util/prelude.h"
 #include "torch/csrc/jit/api/module.h"
 #include "torch/csrc/jit/passes/constant_pooling.h"
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -9,7 +9,7 @@ RUN rm -rf /opt/pytorch/torch_tensorrt /usr/bin/bazel
 
 ARG ARCH="x86_64"
 ARG TARGETARCH="amd64"
-ARG BAZEL_VERSION=4.2.1
+ARG BAZEL_VERSION=5.1.1
 
 RUN [[ "$TARGETARCH" == "amd64" ]] && ARCH="x86_64" || ARCH="${TARGETARCH}" \
  && wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel \
diff --git a/docker/Dockerfile.docs b/docker/Dockerfile.docs
@@ -3,8 +3,8 @@ FROM nvcr.io/nvidia/tensorrt:22.01-py3
 RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add -
 RUN echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list
 
-RUN apt-get update && apt-get install -y bazel-4.2.1 clang-format-9 libjpeg9 libjpeg9-dev
-RUN ln -s /usr/bin/bazel-4.2.1 /usr/bin/bazel
+RUN apt-get update && apt-get install -y bazel-5.1.1 clang-format-9 libjpeg9 libjpeg9-dev
+RUN ln -s /usr/bin/bazel-5.1.1 /usr/bin/bazel
 RUN ln -s $(which clang-format-9) /usr/bin/clang-format
 
 # Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container
diff --git a/docker/WORKSPACE.docs b/docker/WORKSPACE.docs
@@ -3,33 +3,32 @@ workspace(name = "Torch-TensorRT")
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 
-git_repository(
+http_archive(
     name = "rules_python",
-    remote = "https://github.com/bazelbuild/rules_python.git",
-    commit = "4fcc24fd8a850bdab2ef2e078b1de337eea751a6",
-    shallow_since = "1589292086 -0400"
+    sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f",
+    url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz",
 )
 
-load("@rules_python//python:repositories.bzl", "py_repositories")
-py_repositories()
-
-load("@rules_python//python:pip.bzl", "pip_repositories", "pip3_import")
-pip_repositories()
+load("@rules_python//python:pip.bzl", "pip_install")
 
 http_archive(
     name = "rules_pkg",
-    url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.2.4/rules_pkg-0.2.4.tar.gz",
-    sha256 = "4ba8f4ab0ff85f2484287ab06c0d871dcb31cc54d439457d28fd4ae14b18450a",
+    sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d",
+    urls = [
+        "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
+        "https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
+    ],
 )
 
 load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")
+
 rules_pkg_dependencies()
 
 git_repository(
     name = "googletest",
-    remote = "https://github.com/google/googletest",
     commit = "703bd9caab50b139428cea1aaff9974ebee5742e",
-    shallow_since = "1570114335 -0400"
+    remote = "https://github.com/google/googletest",
+    shallow_since = "1570114335 -0400",
 )
 
 # CUDA should be installed on the system locally
@@ -52,17 +51,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "190e963e739d5f7c2dcf94b3994de8fcd335706a4ebb333812ea7d8c841beb06",
+    sha256 = "8d9e829ce9478db4f35bdb7943308cf02e8a2f58cf9bb10f742462c1d57bf287",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.10.0%2Bcu113.zip"],
+    urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.11.0%2Bcu113.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "0996a6a4ea8bbc1137b4fb0476eeca25b5efd8ed38955218dec1b73929090053",
+    sha256 = "90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.10.0%2Bcu113.zip"],
+    urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip"],
 )
 
 ####################################################################################
@@ -84,18 +83,7 @@ new_local_repository(
 #########################################################################
 # Testing Dependencies (optional - comment out on aarch64)
 #########################################################################
-pip3_import(
-    name = "torch_tensorrt_py_deps",
-    requirements = "//py:requirements.txt"
-)
-
-load("@torch_tensorrt_py_deps//:requirements.bzl", "pip_install")
-pip_install()
-
-pip3_import(
-    name = "py_test_deps",
-    requirements = "//tests/py:requirements.txt"
-)
-
-load("@py_test_deps//:requirements.bzl", "pip_install")
-pip_install()
+pip_install(
+    name = "pylinter_deps",
+    requirements = "//tools/linter:requirements.txt",
+)
diff --git a/docker/setup_nox.sh b/docker/setup_nox.sh
@@ -7,7 +7,7 @@ set -e
 post=${1:-""}
 
 # fetch bazel executable
-BAZEL_VERSION=4.2.1
+BAZEL_VERSION=5.1.1
 ARCH=$(uname -m)
 if [[ "$ARCH" == "aarch64" ]]; then ARCH="arm64"; fi
 wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel
diff --git a/tests/util/util.h b/tests/util/util.h
@@ -1,8 +1,8 @@
 #pragma once
 
+#include <ATen/ATen.h>
 #include <string>
 #include <vector>
-#include <ATen/ATen.h>
 #include "ATen/Tensor.h"
 #include "core/ir/ir.h"
 #include "core/util/prelude.h"