Skip to content

Commit fd8548d

Browse files
authored
Merge pull request #1471 from pytorch/master
Update release branch
2 parents a9a4bb2 + 0471f2d commit fd8548d

File tree

214 files changed

+4425
-572
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

214 files changed

+4425
-572
lines changed

.github/code-owners.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
"component: build system":
1111
- "narendasan"
12-
- "andi4191"
1312

1413
"component: conversion":
1514
- "narendasan"
@@ -29,7 +28,6 @@
2928
- "peri044"
3029

3130
"component: execution":
32-
- "andi4191"
3331
- "narendasan"
3432

3533
"component: lowering":
@@ -48,15 +46,12 @@
4846
- "peri044"
4947

5048
"component: runtime":
51-
- "andi4191"
5249
- "narendasan"
5350

5451
"component: tests":
55-
- "andi4191"
5652
- "narendasan"
5753

5854
"component: torchtrtc":
59-
- "andi4191"
6055
- "narendasan"
6156

6257
"component: dependencies":
@@ -74,24 +69,20 @@
7469
- "tanayvarshney"
7570

7671
"infrastructre":
77-
- "andi4191"
7872
- "narendasan"
7973

8074
"component: packaging":
8175
- "narendasan"
82-
- "andi4191"
8376
- "peri044"
8477

8578
"channel: NGC":
86-
- "andi4191"
8779
- "peri044"
8880

8981
"channel: linux-x86":
9082
- "narendasan"
9183
- "peri044"
9284

9385
"channel: linux-sbsa":
94-
- "andi4191"
9586
- "bowang007"
9687

9788
"channel: windows":
@@ -102,16 +93,13 @@
10293
- "bowang007"
10394

10495
"component: tooling":
105-
- "andi4191"
10696
- "narendasan"
10797

10898
"performance":
109-
- "andi4191"
11099
- "peri044"
111100
- "bowang007"
112101

113102
"channel: docker":
114-
- "andi4191"
115103
- "narendasan"
116104

117105
"ux":

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
cmake_minimum_required(VERSION 3.17)
33
project(Torch-TensorRT LANGUAGES CXX)
44

5-
# use c++17
6-
set(CMAKE_CXX_STANDARD 17)
5+
# use c++14 like PyTorch
6+
set(CMAKE_CXX_STANDARD 14)
77

88
# Build the libraries with -fPIC
99
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

core/compiler.cpp

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,17 @@ void AddEngineToGraph(
3131
torch::jit::script::Module mod,
3232
std::shared_ptr<torch::jit::Graph>& g,
3333
const std::string& serialized_engine,
34-
runtime::CudaDevice& device_info,
34+
runtime::RTDevice& device_info,
35+
const std::vector<std::string>& input_binding_names,
36+
const std::vector<std::string>& output_binding_names,
3537
std::string engine_id = "",
3638
bool fallback = false) {
3739
auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(
38-
mod._ivalue()->name() + "_engine_" + engine_id, serialized_engine, device_info);
40+
mod._ivalue()->name() + "_engine_" + engine_id,
41+
serialized_engine,
42+
device_info,
43+
input_binding_names,
44+
output_binding_names);
3945
// Get required metadata about the engine out
4046
auto num_io = engine_ptr->num_io;
4147
auto name = engine_ptr->name;
@@ -137,10 +143,13 @@ partitioning::GraphAndMapping BuildHybridGraph(
137143
auto partitioning_info = cfg.partitioning_info;
138144

139145
auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info);
140-
auto collection_input_ivalues_map =
141-
partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types);
146+
partitioning_ctx.input_types_map = first_use_types;
142147

143-
partitioning::partition(&partitioning_ctx, collection_input_ivalues_map);
148+
// Generate a dictionary of input torch::jit::Value's to their min, opt, max tensors and store in ctx
149+
// TODO: Combine this within partition call
150+
partitioning::populateInputIValues(&partitioning_ctx);
151+
152+
partitioning::partition(&partitioning_ctx);
144153

145154
for (auto& partitioned_block : partitioning_ctx.partitioned_blocks) {
146155
partitioning::PartitionedGraph& segmented_blocks = partitioned_block.second;
@@ -151,23 +160,24 @@ partitioning::GraphAndMapping BuildHybridGraph(
151160
trt_engine_id << reinterpret_cast<const int*>(&seg_block);
152161

153162
if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
154-
auto shapes = seg_block.in_shapes();
155-
auto types = seg_block.in_types();
156-
std::vector<ir::Input> inputs;
157-
for (size_t i = 0; i < shapes.size(); i++) {
158-
auto in = ir::Input(shapes[i]);
159-
in.dtype = util::ScalarTypeToTRTDataType(types[i]);
160-
inputs.push_back(in);
161-
}
163+
auto inputs = seg_block.construct_inputs_spec();
162164
// update the input ranges for each segments
163165
convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params);
164166

165167
// TODO mapping Inputs Ivalue to flatten one here
166168
auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_info, static_params);
167169
auto temp_g = std::make_shared<torch::jit::Graph>();
168170
auto device_spec = convert_info.engine_settings.device;
169-
auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
170-
AddEngineToGraph(new_mod, temp_g, engine, cuda_device, trt_engine_id.str(), true);
171+
auto cuda_device = runtime::RTDevice(device_spec.gpu_id, device_spec.device_type);
172+
AddEngineToGraph(
173+
new_mod,
174+
temp_g,
175+
engine,
176+
cuda_device,
177+
std::vector<std::string>(),
178+
std::vector<std::string>(),
179+
trt_engine_id.str(),
180+
true);
171181

172182
seg_block.update_graph(temp_g);
173183
}
@@ -283,7 +293,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
283293
torch::jit::Module new_mod(mod._ivalue()->name() + "_trt");
284294

285295
auto device_spec = cfg.convert_info.engine_settings.device;
286-
auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
296+
auto cuda_device = runtime::RTDevice(device_spec.gpu_id, device_spec.device_type);
287297

288298
for (const torch::jit::Method& method : mod.get_methods()) {
289299
if (method.name().compare("forward") == 0) {
@@ -331,7 +341,7 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
331341
"Not all operations in graph are supported by the compiler");
332342
// TODO find the right
333343
auto engine = conversion::ConvertBlockToEngine(g->block(), cfg.convert_info, static_params);
334-
AddEngineToGraph(new_mod, new_g, engine, cuda_device);
344+
AddEngineToGraph(new_mod, new_g, engine, cuda_device, std::vector<std::string>(), std::vector<std::string>());
335345
}
336346
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
337347
auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
@@ -342,12 +352,16 @@ torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec cfg)
342352
return new_mod;
343353
}
344354

345-
torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device) {
355+
torch::jit::script::Module EmbedEngineInNewModule(
356+
const std::string& engine,
357+
runtime::RTDevice cuda_device,
358+
const std::vector<std::string>& input_binding_names,
359+
const std::vector<std::string>& output_binding_names) {
346360
std::ostringstream engine_id;
347361
engine_id << reinterpret_cast<const int*>(&engine);
348362
torch::jit::script::Module new_mod("tensorrt_engine_mod_" + engine_id.str());
349363
auto new_g = std::make_shared<torch::jit::Graph>();
350-
AddEngineToGraph(new_mod, new_g, engine, cuda_device);
364+
AddEngineToGraph(new_mod, new_g, engine, cuda_device, input_binding_names, output_binding_names);
351365
auto new_method = new_mod._ivalue()->compilation_unit()->create_function("forward", new_g);
352366
auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
353367
new_mod.type()->addMethod(new_method);

core/compiler.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::
2828

2929
torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg);
3030

31-
torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device);
31+
torch::jit::script::Module EmbedEngineInNewModule(
32+
const std::string& engine,
33+
runtime::RTDevice cuda_device,
34+
const std::vector<std::string>& input_binding_names,
35+
const std::vector<std::string>& output_binding_names);
3236

3337
void set_device(const int gpu_id);
3438

core/conversion/conversionctx/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ cc_library(
2121
deps = [
2222
"@tensorrt//:nvinfer",
2323
"//core/util:prelude",
24+
"//core/ir",
2425
] + select({
2526
":use_pre_cxx11_abi": ["@libtorch_pre_cxx11_abi//:libtorch"],
2627
"//conditions:default": ["@libtorch//:libtorch"],

core/conversion/conversionctx/ConversionCtx.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,21 @@
99
#include "torch/csrc/jit/ir/ir.h"
1010

1111
#include <cuda_runtime.h>
12+
#include "core/ir/ir.h"
1213
#include "core/util/prelude.h"
1314

1415
namespace torch_tensorrt {
1516
namespace core {
1617
namespace conversion {
1718

18-
struct Device {
19-
nvinfer1::DeviceType device_type;
20-
int64_t gpu_id;
21-
int64_t dla_core;
22-
bool allow_gpu_fallback;
23-
Device() : device_type(nvinfer1::DeviceType::kGPU), gpu_id(0), dla_core(0), allow_gpu_fallback(false) {}
24-
};
25-
2619
struct BuilderSettings {
2720
std::set<nvinfer1::DataType> enabled_precisions = {};
2821
bool sparse_weights = false;
2922
bool disable_tf32 = false;
3023
bool refit = false;
3124
bool debug = false;
3225
bool truncate_long_and_double = false;
33-
Device device;
26+
ir::Device device;
3427
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
3528
nvinfer1::IInt8Calibrator* calibrator = nullptr;
3629
uint64_t num_avg_timing_iters = 1;

core/conversion/converters/converter_util.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,38 @@ nvinfer1::ILayer* add_elementwise(
156156
return ele;
157157
}
158158

159+
nvinfer1::ITensor* add_abs(
160+
ConversionCtx* ctx,
161+
const torch::jit::Node* n,
162+
nvinfer1::ITensor* self,
163+
const std::string& name) {
164+
nvinfer1::ILayer* absolute_value_layer;
165+
166+
// Check if TRT Unary ops support the input type
167+
bool unary_supported_input = (self->getType() == nvinfer1::DataType::kFLOAT) ||
168+
(self->getType() == nvinfer1::DataType::kHALF) || (self->getType() == nvinfer1::DataType::kINT8);
169+
if (unary_supported_input) {
170+
absolute_value_layer = ctx->net->addUnary(*self, nvinfer1::UnaryOperation::kABS);
171+
TORCHTRT_CHECK(absolute_value_layer, "Unable to create abs layer from node: " << *n);
172+
absolute_value_layer->setName(name.c_str());
173+
} else {
174+
LOG_GRAPH(
175+
"Tensor is of unsupported type "
176+
<< self->getType() << " for IUnaryLayer::kABS. Using backup implementation via IElementWise (max(x, -x)");
177+
// For types not supported by kABS, use an elementwise implementation abs(x) = max(x, -1 * x)
178+
at::Tensor neg_one = torch::full({1}, -1).to(util::TRTDataTypeToScalarType(self->getType()));
179+
auto neg_one_const = tensor_to_const(ctx, neg_one);
180+
auto neg_layer = add_elementwise(
181+
ctx, nvinfer1::ElementWiseOperation::kPROD, self, neg_one_const, util::node_info(n) + std::string("_Negation"));
182+
TORCHTRT_CHECK(neg_layer, "Unable to create prod layer from node: " << *n);
183+
absolute_value_layer =
184+
add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, self, neg_layer->getOutput(0), name);
185+
TORCHTRT_CHECK(absolute_value_layer, "Unable to create max layer from node: " << *n);
186+
}
187+
188+
return absolute_value_layer->getOutput(0);
189+
}
190+
159191
nvinfer1::ITensor* applyIdentityOp(ConversionCtx* ctx, nvinfer1::ITensor* tensor, const std::string& tensor_name) {
160192
auto id_layer = ctx->net->addIdentity(*tensor);
161193
auto id_out_tensor = id_layer->getOutput(0);

core/conversion/converters/converter_util.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,21 @@ nvinfer1::ITensor* addUnpadding(
3535
bool trailing = true,
3636
bool use_zeros = true);
3737

38+
// TODO: Change add_elementwise schema to output nvinfer1::ITensor* instead of nvinfer1::ILayer*,
39+
// for consistency with other utils. Need to change schema and usage in all calling contexts
3840
nvinfer1::ILayer* add_elementwise(
3941
ConversionCtx* ctx,
4042
nvinfer1::ElementWiseOperation op,
4143
nvinfer1::ITensor* self,
4244
nvinfer1::ITensor* other,
4345
const std::string& name);
4446

47+
nvinfer1::ITensor* add_abs(
48+
ConversionCtx* ctx,
49+
const torch::jit::Node* n,
50+
nvinfer1::ITensor* self,
51+
const std::string& name);
52+
4553
// Apply an identity operation on a tensor. Used in the case where an input is an output to a network.
4654
nvinfer1::ITensor* applyIdentityOp(ConversionCtx* ctx, nvinfer1::ITensor* tensor, const std::string& name);
4755

core/conversion/converters/impl/einsum.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ auto einsum_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pat
1818
auto equation = args[0].unwrapToString();
1919
auto in = args[1].IValue()->toListRef();
2020

21+
TORCHTRT_CHECK(
22+
in.size() <= 2,
23+
"TensorRT currently supports up to 2 input tensors "
24+
<< "to einsum but operation had " << in.size()
25+
<< " input tensors, please specify torch_executed_ops=[\"aten::einsum\"] "
26+
<< "at compilation time to avoid this error.");
27+
2128
std::vector<nvinfer1::ITensor*> tensors;
2229

2330
// Populate vector of ITensor pointers

0 commit comments

Comments
 (0)