Skip to content

Torch-TensorRT 1.1.0 #971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bazelversion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.2.1
5.1.1
9 changes: 5 additions & 4 deletions core/lowering/passes/linear_to_addmm.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@

#include <torch/csrc/jit/runtime/operator.h>
#include "core/util/prelude.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change related to linter?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, a bunch of people were failing the linter so im fixing it all at once

#include "torch/csrc/jit/api/function_impl.h"
#include "torch/csrc/jit/ir/alias_analysis.h"
#include "torch/csrc/jit/jit_log.h"
#include "torch/csrc/jit/passes/constant_propagation.h"
#include "torch/csrc/jit/passes/dead_code_elimination.h"
#include "torch/csrc/jit/passes/guard_elimination.h"
#include "torch/csrc/jit/passes/peephole.h"
#include "torch/csrc/jit/runtime/graph_executor.h"
#include "torch/csrc/jit/api/function_impl.h"
#include "torch/csrc/jit/passes/subgraph_rewrite.h"
#include "core/util/prelude.h"
#include "torch/csrc/jit/runtime/graph_executor.h"

namespace torch_tensorrt {
namespace core {
Expand All @@ -34,7 +34,8 @@ void replaceLinearWithBiasNonePattern(std::shared_ptr<torch::jit::Graph> graph)
continue;
} else {
torch::jit::WithInsertPoint guard(*it);
std::shared_ptr<torch::jit::Graph> d_graph = toGraphFunction(decompose_funcs.get_function("linear")).graph();;
std::shared_ptr<torch::jit::Graph> d_graph = toGraphFunction(decompose_funcs.get_function("linear")).graph();
;
torch::jit::Value* new_output = insertGraph(*it->owningGraph(), *d_graph, it->inputs()).at(0);
new_output->setType(it->output()->type());
it->output()->replaceAllUsesWith(new_output);
Expand Down
7 changes: 4 additions & 3 deletions core/lowering/passes/reduce_gelu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ void ReduceGelu(std::shared_ptr<torch::jit::Graph>& graph) {
%out : Tensor = aten::gelu(%x)
return (%out))IR";

// This gelu_approximate_pattern schema exists in 21.11, 21.12, 22.01 containers of pytorch. These container versions use
// an unmerged PR in pytorch : https://github.com/pytorch/pytorch/pull/61439. We reduce this to regular Gelu.
// This gelu_approximate_pattern schema exists in 21.11, 21.12, 22.01 containers of pytorch. These container versions
// use an unmerged PR in pytorch : https://github.com/pytorch/pytorch/pull/61439. We reduce this to regular Gelu.
std::string gelu_approximate_pattern = R"IR(
graph(%x : Tensor, %approx):
%out : Tensor = aten::gelu(%x, %approx)
Expand Down Expand Up @@ -64,7 +64,8 @@ void ReduceGelu(std::shared_ptr<torch::jit::Graph>& graph) {
map_gelu_to_pointwise_ops.runOnGraph(graph);

torch::jit::SubgraphRewriter map_gelu_approximate_to_pointwise_ops;
map_gelu_approximate_to_pointwise_ops.RegisterRewritePattern(gelu_approximate_pattern, gelu_reduce_multi_input_pattern);
map_gelu_approximate_to_pointwise_ops.RegisterRewritePattern(
gelu_approximate_pattern, gelu_reduce_multi_input_pattern);
map_gelu_approximate_to_pointwise_ops.runOnGraph(graph);

LOG_GRAPH("Post lowering of [aten::gelu] -> " << *graph);
Expand Down
26 changes: 15 additions & 11 deletions core/partitioning/partitioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,11 @@ std::unordered_map<torch::jit::Value*, usage_info> getInputUsageCounts(
return usage_counts;
}

std::unordered_map<size_t, std::list<SegmentedBlock>::iterator>
getIdxtoIterMap(std::list<SegmentedBlock> &segmented_blocks_list) {
std::unordered_map<size_t, std::list<SegmentedBlock>::iterator> getIdxtoIterMap(
std::list<SegmentedBlock>& segmented_blocks_list) {
std::unordered_map<size_t, std::list<SegmentedBlock>::iterator> idx_to_iter;
auto iter = segmented_blocks_list.begin();
for (int i = 0; i < segmented_blocks_list.size(); ++i, ++iter) {
for (uint64_t i = 0; i < segmented_blocks_list.size(); ++i, ++iter) {
idx_to_iter[i] = iter;
}
return idx_to_iter;
Expand Down Expand Up @@ -283,22 +283,24 @@ void resolveNonTensorInputBlocks(PartitionedGraph& segmented_blocks) {
}

void resolveTensorListInputBlocks(PartitionedGraph& segmented_blocks) {
// usage_counts is a map with key as non-tensor/tensorlist inputs and value as the idx of segmented block which produces/contains it.
auto usage_counts = getInputUsageCounts(
segmented_blocks, [](torch::jit::Value* input) -> bool { return isTensorList(input); });
// usage_counts is a map with key as non-tensor/tensorlist inputs and value as the idx of segmented block which
// produces/contains it.
auto usage_counts =
getInputUsageCounts(segmented_blocks, [](torch::jit::Value* input) -> bool { return isTensorList(input); });

// Get idx of the segblock to its iterator mapping
std::list<SegmentedBlock> segmented_blocks_list(segmented_blocks.cbegin(), segmented_blocks.cend());
auto idx_to_iter = getIdxtoIterMap(segmented_blocks_list);

std::unordered_set<int> updated_segments;
// we need to re-segment TensorRT segments whose inputs are TensorLists
for (auto &use : usage_counts) {
for (auto& use : usage_counts) {
auto use_info = use.second;
// For a particular tensorlist input, traverse through all ids of segmented blocks whose target is TensorRT
for (auto i : use_info.tensorrt_use_id) {
if (!updated_segments.count(i)) {
// tensorlistinput_to_segblock is a mapping from {tensorlist input : segmented block which produced this tensorlist input}
// tensorlistinput_to_segblock is a mapping from {tensorlist input : segmented block which produced this
// tensorlist input}
std::unordered_map<torch::jit::Value*, SegmentedBlock> tensorlistinput_to_segblock;
for (auto input : segmented_blocks[i].raw_inputs()) {
if (isTensorList(input)) {
Expand All @@ -308,18 +310,20 @@ void resolveTensorListInputBlocks(PartitionedGraph& segmented_blocks) {

// For each tensorlist input in tensorlistinput_to_segblock, get the node which actually uses this input.
// Once we retrieve the node, we remove it from the current TensorRT segmented_blocks[i]. This node should be
// added to block that generated/produced (can be obtained via produce_id) this tensorlist input in the first place.
// added to block that generated/produced (can be obtained via produce_id) this tensorlist input in the first
// place.
auto seg_blocks = segmentBlocksWithTensorListInputs(segmented_blocks[i], tensorlistinput_to_segblock);
auto append_blocks = seg_blocks.first;
auto trt_block = seg_blocks.second;
// Remove the current TensorRT seg_block and replace it with new TRT block (non empty) which has the node that uses tensorlist input removed.
// Remove the current TensorRT seg_block and replace it with new TRT block (non empty) which has the node that
// uses tensorlist input removed.
auto next_iter = segmented_blocks_list.erase(idx_to_iter[i]);
if (trt_block.raw_nodes().size() > 0) {
segmented_blocks_list.insert(next_iter, trt_block);
}

// append blocks' nodes to the producer seg_block
for (auto append_block: append_blocks) {
for (auto append_block : append_blocks) {
auto input = append_block.first; // corresponds to the tensorlist input
auto block = append_block.second;
// append nodes to segmented_blocks_list
Expand Down
2 changes: 1 addition & 1 deletion core/partitioning/shape_analysis.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <ATen/ATen.h>
#include "core/partitioning/shape_analysis.h"
#include <ATen/ATen.h>
#include "core/util/prelude.h"
#include "torch/csrc/jit/api/module.h"
#include "torch/csrc/jit/passes/constant_pooling.h"
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RUN rm -rf /opt/pytorch/torch_tensorrt /usr/bin/bazel

ARG ARCH="x86_64"
ARG TARGETARCH="amd64"
ARG BAZEL_VERSION=4.2.1
ARG BAZEL_VERSION=5.1.1

RUN [[ "$TARGETARCH" == "amd64" ]] && ARCH="x86_64" || ARCH="${TARGETARCH}" \
&& wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel \
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.docs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM nvcr.io/nvidia/tensorrt:22.01-py3
RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add -
RUN echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list

RUN apt-get update && apt-get install -y bazel-4.2.1 clang-format-9 libjpeg9 libjpeg9-dev
RUN ln -s /usr/bin/bazel-4.2.1 /usr/bin/bazel
RUN apt-get update && apt-get install -y bazel-5.1.1 clang-format-9 libjpeg9 libjpeg9-dev
RUN ln -s /usr/bin/bazel-5.1.1 /usr/bin/bazel
RUN ln -s $(which clang-format-9) /usr/bin/clang-format

# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container
Expand Down
52 changes: 20 additions & 32 deletions docker/WORKSPACE.docs
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,32 @@ workspace(name = "Torch-TensorRT")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")

git_repository(
http_archive(
name = "rules_python",
remote = "https://github.com/bazelbuild/rules_python.git",
commit = "4fcc24fd8a850bdab2ef2e078b1de337eea751a6",
shallow_since = "1589292086 -0400"
sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f",
url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz",
)

load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories()

load("@rules_python//python:pip.bzl", "pip_repositories", "pip3_import")
pip_repositories()
load("@rules_python//python:pip.bzl", "pip_install")

http_archive(
name = "rules_pkg",
url = "https://github.com/bazelbuild/rules_pkg/releases/download/0.2.4/rules_pkg-0.2.4.tar.gz",
sha256 = "4ba8f4ab0ff85f2484287ab06c0d871dcb31cc54d439457d28fd4ae14b18450a",
sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
"https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz",
],
)

load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")

rules_pkg_dependencies()

git_repository(
name = "googletest",
remote = "https://github.com/google/googletest",
commit = "703bd9caab50b139428cea1aaff9974ebee5742e",
shallow_since = "1570114335 -0400"
remote = "https://github.com/google/googletest",
shallow_since = "1570114335 -0400",
)

# CUDA should be installed on the system locally
Expand All @@ -52,17 +51,17 @@ new_local_repository(
http_archive(
name = "libtorch",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "190e963e739d5f7c2dcf94b3994de8fcd335706a4ebb333812ea7d8c841beb06",
sha256 = "8d9e829ce9478db4f35bdb7943308cf02e8a2f58cf9bb10f742462c1d57bf287",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.10.0%2Bcu113.zip"],
urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.11.0%2Bcu113.zip"],
)

http_archive(
name = "libtorch_pre_cxx11_abi",
build_file = "@//third_party/libtorch:BUILD",
sha256 = "0996a6a4ea8bbc1137b4fb0476eeca25b5efd8ed38955218dec1b73929090053",
sha256 = "90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.10.0%2Bcu113.zip"],
urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip"],
)

####################################################################################
Expand All @@ -84,18 +83,7 @@ new_local_repository(
#########################################################################
# Testing Dependencies (optional - comment out on aarch64)
#########################################################################
pip3_import(
name = "torch_tensorrt_py_deps",
requirements = "//py:requirements.txt"
)

load("@torch_tensorrt_py_deps//:requirements.bzl", "pip_install")
pip_install()

pip3_import(
name = "py_test_deps",
requirements = "//tests/py:requirements.txt"
)

load("@py_test_deps//:requirements.bzl", "pip_install")
pip_install()
pip_install(
name = "pylinter_deps",
requirements = "//tools/linter:requirements.txt",
)
2 changes: 1 addition & 1 deletion docker/setup_nox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -e
post=${1:-""}

# fetch bazel executable
BAZEL_VERSION=4.2.1
BAZEL_VERSION=5.1.1
ARCH=$(uname -m)
if [[ "$ARCH" == "aarch64" ]]; then ARCH="arm64"; fi
wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel
Expand Down
2 changes: 1 addition & 1 deletion tests/util/util.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#pragma once

#include <ATen/ATen.h>
#include <string>
#include <vector>
#include <ATen/ATen.h>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this also related to linter?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

#include "ATen/Tensor.h"
#include "core/ir/ir.h"
#include "core/util/prelude.h"
Expand Down