Skip to content

Commit 19c91f2

Browse files
committed
Merge branch 'master' of https://github.com/NVIDIA/TRTorch
2 parents a4c8511 + ac4ac5e commit 19c91f2

File tree

14 files changed

+817
-62
lines changed

14 files changed

+817
-62
lines changed

.bazelversion

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.2.0

README.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
6868

6969
### Dependencies
7070

71+
- Bazel 3.2.0
7172
- Libtorch 1.5.0
7273
- CUDA 10.2
7374
- cuDNN 7.6.5
@@ -81,7 +82,24 @@ Releases: https://github.com/NVIDIA/TRTorch/releases
8182

8283
### Installing Dependencies
8384

84-
You need to start by having CUDA installed on the system, Libtorch will automatically be pulled for you by bazel,
85+
#### 0. Install Bazel
86+
87+
If you don't have bazel installed, the easiest way is to install bazelisk using the method of you choosing https://github.com/bazelbuild/bazelisk
88+
89+
Otherwise you can use the following instructions to install binaries https://docs.bazel.build/versions/master/install.html
90+
91+
Finally if you need to compile from source (e.g. aarch64 until bazel distributes binaries for the architecture) you can use these instructions
92+
93+
```sh
94+
export BAZEL_VERSION=<VERSION>
95+
mkdir bazel
96+
cd bazel
97+
curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip
98+
unzip bazel-$BAZEL_VERSION-dist.zip
99+
bash ./compile.sh
100+
```
101+
102+
You need to start by having CUDA installed on the system, LibTorch will automatically be pulled for you by bazel,
85103
then you have two options.
86104

87105
#### 1. Building using cuDNN & TensorRT tarball distributions

core/conversion/conversion.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,27 @@ void MapIValues(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> in_l
201201
});
202202

203203
for (auto p : input_output_pairs) {
204-
auto input = ctx->evaluated_value_map[p.first];
205-
ctx->evaluated_value_map[p.second] = torch::jit::IValue(input);
204+
if (ctx->evaluated_value_map.find(p.first) != ctx->evaluated_value_map.end()) {
205+
auto input = ctx->evaluated_value_map[p.first];
206+
ctx->evaluated_value_map[p.second] = torch::jit::IValue(input);
207+
} else if (ctx->value_tensor_map.find(p.first) != ctx->value_tensor_map.end()) {
208+
auto input = ctx->value_tensor_map[p.first];
209+
ctx->value_tensor_map[p.second] = input;
210+
} else {
211+
TRTORCH_THROW_ERROR("Cannot find Value " << p.first->debugName() << " either evaluated values or tensor maps (MapIValues)");
212+
}
206213
}
207214
}
208215

209-
void EvaluateConditionalBlock(ConversionCtx* ctx, const torch::jit::Node* n) {
216+
void EvaluateConditionalBlock(ConversionCtx* ctx, const torch::jit::Node* n, bool contained_in_loop = false) {
217+
bool output_type_includes_tensor = false;
218+
for (auto o : n->outputs()) {
219+
if (o->type()->isSubtypeOf(c10::TensorType::get())) {
220+
output_type_includes_tensor = true;
221+
}
222+
}
223+
TRTORCH_CHECK(!(contained_in_loop && output_type_includes_tensor), "TRTorch currently cannot compile conditionals within loops");
224+
210225
auto condition = ctx->evaluated_value_map[n->input(0)].toBool();
211226
LOG_DEBUG(ctx->logger, "(Conditional Evaluation) Evaluating block " << (int) condition);
212227
auto b = condition ? n->blocks()[0] : n->blocks()[1];
@@ -215,16 +230,19 @@ void EvaluateConditionalBlock(ConversionCtx* ctx, const torch::jit::Node* n) {
215230
if (bn->kind() == torch::jit::prim::Loop) {
216231
EvaluateLoopBlock(ctx, bn);
217232
} else if (bn->kind() == torch::jit::prim::If) {
218-
EvaluateConditionalBlock(ctx, bn);
219-
} else {
220-
TRTORCH_CHECK(evaluators::shouldEvalAtConversionTime(bn), "TRTorch currently can only compile conditionals that are evaluatable at conversion time but node " << *bn << " cannot be evaluated.")
233+
EvaluateConditionalBlock(ctx, bn, contained_in_loop);
234+
} else if (evaluators::shouldEvalAtConversionTime(bn)) {
221235
auto eval = EvaluateNode(ctx, bn);
222236
if (!eval.value().isTensor()) {
223237
LOG_DEBUG(ctx->logger, "(Conditional Evaluation) Found the value to be: " << eval.value());
224238
} else {
225239
LOG_DEBUG(ctx->logger, "(Conditional Evaluation) Found the value to be a tensor (shape " << eval.value().toTensor().sizes() << ')');
226240
}
227241
ctx->AssociateValueAndIValue(bn->output(0), eval.value());
242+
} else if (converters::node_is_convertable(bn)) {
243+
AddLayer(ctx, bn);
244+
} else {
245+
TRTORCH_THROW_ERROR("TRTorch is unable to compile this conditional, a converter or evaluator is not available for node " << *bn);
228246
}
229247
}
230248

@@ -251,7 +269,7 @@ void EvaluateLoopBlock(ConversionCtx* ctx, const torch::jit::Node* n) {
251269
if (bn->kind() == torch::jit::prim::Loop) {
252270
EvaluateLoopBlock(ctx, n);
253271
} else if (bn->kind() == torch::jit::prim::If) {
254-
EvaluateConditionalBlock(ctx, bn);
272+
EvaluateConditionalBlock(ctx, bn, true);
255273
} else {
256274
TRTORCH_CHECK(evaluators::shouldEvalAtConversionTime(bn), "TRTorch currently can only compile loops that are evaluatable at conversion time but node " << *bn << " cannot be evaluated.");
257275
auto eval = EvaluateNode(ctx, bn);

core/conversion/converters/impl/pooling.cpp

Lines changed: 210 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,117 @@ namespace converters {
88
namespace impl {
99
namespace {
1010

11+
bool MaxPoolingConverter(ConversionCtx* ctx, const torch::jit::Node* n, args& args) {
12+
auto in = args[0].ITensor();
13+
auto shape = util::toVec(in->getDimensions());
14+
15+
// Max Pool needs at least 4D input
16+
if (shape.size() < 4) {
17+
auto new_shape = util::toDimsPad(shape, 4);
18+
LOG_DEBUG("Input shape is less than 4D got: " << util::toDims(shape) << ", inserting shuffle layer to reshape to 4D tensor shape: " << new_shape);
19+
auto shuffle = ctx->net->addShuffle(*in);
20+
shuffle->setReshapeDimensions(new_shape);
21+
shuffle->setName((util::node_info(n) + " [Reshape to " + util::toStr(new_shape) + ']').c_str());
22+
in = shuffle->getOutput(0);
23+
}
24+
25+
26+
auto kernel_size = util::toDims(args[1].unwrapToIntList());
27+
LOG_DEBUG("kernel_size: " << kernel_size);
28+
auto padding = util::toDims(args[3].unwrapToIntList());
29+
LOG_DEBUG("padding: " << padding);
30+
auto stride = util::toDims(args[2].unwrapToIntList());
31+
LOG_DEBUG("stride: " << stride);
32+
33+
auto dilation = util::toDims(args[4].unwrapToIntList());
34+
35+
TRTORCH_ASSERT(dilation == util::toDims(std::vector<int64_t>(dilation.nbDims, 1)), "Pooling dilation is not supported in TensorRT");
36+
37+
LOG_DEBUG("dilation: " << dilation);
38+
LOG_WARNING("Dilation not used in max pooling converter");
39+
bool ceil_mode = args[5].unwrapToBool();
40+
41+
auto new_layer = ctx->net->addPoolingNd(*in, nvinfer1::PoolingType::kMAX, kernel_size);
42+
TRTORCH_CHECK(new_layer, "Unable to create Max Pooling layer from node: " << *n);
43+
44+
new_layer->setName(util::node_info(n).c_str());
45+
new_layer->setPaddingNd(padding);
46+
if (stride.nbDims != 2 && ctx->settings.device == nvinfer1::DeviceType::kDLA) {
47+
if (!ctx->settings.allow_gpu_fallback) {
48+
TRTORCH_THROW_ERROR("DLA Pooling stride is limited to 2D, allow GPU fallback");
49+
} else {
50+
LOG_WARNING("DLA Pooling stride is limited to 2D, will run on GPU");
51+
}
52+
}
53+
new_layer->setStrideNd(stride);
54+
55+
auto padding_mode = ceil_mode ? nvinfer1::PaddingMode::kEXPLICIT_ROUND_UP : nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN;
56+
new_layer->setPaddingMode(padding_mode);
57+
58+
new_layer->setName(util::node_info(n).c_str());
59+
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
60+
61+
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
62+
return true;
63+
}
64+
65+
bool AvgPoolingConverter(ConversionCtx* ctx, const torch::jit::Node* n, args& args) {
66+
auto in = args[0].ITensor();
67+
auto shape = util::toVec(in->getDimensions());
68+
69+
// Avg Pool needs at least 4D input
70+
if (shape.size() < 4) {
71+
auto new_shape = util::toDimsPad(shape, 4);
72+
LOG_DEBUG("Input shape is less than 4D got: " << util::toDims(shape) << ", inserting shuffle layer to reshape to 4D tensor shape: " << new_shape);
73+
auto shuffle = ctx->net->addShuffle(*in);
74+
shuffle->setReshapeDimensions(new_shape);
75+
shuffle->setName((util::node_info(n) + " [Reshape to " + util::toStr(new_shape) + ']').c_str());
76+
in = shuffle->getOutput(0);
77+
}
78+
79+
80+
auto kernel_size = util::toDims(args[1].unwrapToIntList());
81+
LOG_DEBUG("kernel_size: " << kernel_size);
82+
auto padding = util::toDims(args[3].unwrapToIntList());
83+
LOG_DEBUG("padding: " << padding);
84+
auto stride = util::toDims(args[2].unwrapToIntList());
85+
LOG_DEBUG("stride: " << stride);
86+
87+
bool ceil_mode = args[4].unwrapToBool();
88+
bool count_inlcude_pad = args[5].unwrapToBool();
89+
90+
auto new_layer = ctx->net->addPoolingNd(*in, nvinfer1::PoolingType::kAVERAGE, kernel_size);
91+
TRTORCH_CHECK(new_layer, "Unable to create Avg Pooling layer from node: " << *n);
92+
93+
new_layer->setName(util::node_info(n).c_str());
94+
new_layer->setPaddingNd(padding);
95+
if (stride.nbDims != 2 && ctx->settings.device == nvinfer1::DeviceType::kDLA) {
96+
if (!ctx->settings.allow_gpu_fallback) {
97+
TRTORCH_THROW_ERROR("DLA Pooling stride is limited to 2D, allow GPU fallback");
98+
} else {
99+
LOG_WARNING("DLA Pooling stride is limited to 2D, will run on GPU");
100+
}
101+
}
102+
new_layer->setStrideNd(stride);
103+
104+
auto padding_mode = ceil_mode ? nvinfer1::PaddingMode::kEXPLICIT_ROUND_UP : nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN;
105+
new_layer->setPaddingMode(padding_mode);
106+
new_layer->setAverageCountExcludesPadding(!count_inlcude_pad);
107+
108+
if (!(args[6].IValue()->isNone())) {
109+
LOG_WARNING("Divisor override is now handled by Avg Pooling Converter");
110+
}
111+
112+
new_layer->setName(util::node_info(n).c_str());
113+
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
114+
115+
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
116+
return true;
117+
}
118+
11119
auto pooling_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
12120
.pattern({
13-
"aten::max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=[0, 0], int[2] dilation=[1, 1], bool ceil_mode=False) -> (Tensor)",
121+
"aten::max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=[], int[1] dilation=[], bool ceil_mode=False) -> (Tensor)",
14122
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
15123
auto in = args[0].ITensor();
16124
auto shape = util::toVec(in->getDimensions());
@@ -25,38 +133,130 @@ auto pooling_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
25133
in = shuffle->getOutput(0);
26134
}
27135

28-
29-
auto kernel_size = util::toDimsHW(args[1].unwrapToIntList());
136+
auto kernel_vec = args[1].unwrapToIntList().vec();
137+
kernel_vec.insert(kernel_vec.begin(), 1);
138+
auto kernel_size = util::toDims(kernel_vec);
30139
LOG_DEBUG("kernel_size: " << kernel_size);
31-
auto padding = util::toDimsHW(args[3].unwrapToIntList());
140+
auto stride_vec = args[2].unwrapToIntList().vec();
141+
stride_vec.insert(stride_vec.begin(), 1);
142+
auto stride = util::toDims(stride_vec);
143+
LOG_DEBUG("stride: " << stride);
144+
auto padding_vec = args[3].unwrapToIntList().vec();
145+
padding_vec.insert(padding_vec.begin(), 0);
146+
auto padding = util::toDims(padding_vec);
32147
LOG_DEBUG("padding: " << padding);
148+
33149
auto dilation = util::toDims(args[4].unwrapToIntList());
34150

35-
TRTORCH_ASSERT(dilation == util::toDims(std::vector<int64_t>({1,1})), "Pooling dilation is not supported in TensorRT");
151+
TRTORCH_ASSERT(dilation == util::toDims(std::vector<int64_t>(dilation.nbDims, 1)), "Pooling dilation is not supported in TensorRT");
36152

37153
LOG_DEBUG("dilation: " << dilation);
38154
LOG_WARNING("Dilation not used in max pooling converter");
39-
bool ceil_mode = args[5].IValue()->to<bool>();
155+
bool ceil_mode = args[5].unwrapToBool();
40156

41157
auto new_layer = ctx->net->addPoolingNd(*in, nvinfer1::PoolingType::kMAX, kernel_size);
42-
TRTORCH_CHECK(new_layer, "Unable to create Max Pool 2D layer from node: " << *n);
158+
TRTORCH_CHECK(new_layer, "Unable to create Max Pooling layer from node: " << *n);
159+
160+
new_layer->setName(util::node_info(n).c_str());
161+
new_layer->setPaddingNd(padding);
162+
if (stride.nbDims != 2 && ctx->settings.device == nvinfer1::DeviceType::kDLA) {
163+
if (!ctx->settings.allow_gpu_fallback) {
164+
TRTORCH_THROW_ERROR("DLA Pooling stride is limited to 2D, allow GPU fallback");
165+
} else {
166+
LOG_WARNING("DLA Pooling stride is limited to 2D, will run on GPU");
167+
}
168+
}
169+
new_layer->setStrideNd(stride);
170+
171+
auto padding_mode = ceil_mode ? nvinfer1::PaddingMode::kEXPLICIT_ROUND_UP : nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN;
172+
new_layer->setPaddingMode(padding_mode);
173+
174+
new_layer->setName(util::node_info(n).c_str());
175+
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
176+
177+
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
178+
return true;
179+
}
180+
}).pattern({
181+
"aten::avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor",
182+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
183+
auto in = args[0].ITensor();
184+
auto shape = util::toVec(in->getDimensions());
185+
186+
// Avg Pool needs at least 4D input
187+
if (shape.size() < 4) {
188+
auto new_shape = util::toDimsPad(shape, 4);
189+
LOG_DEBUG("Input shape is less than 4D got: " << util::toDims(shape) << ", inserting shuffle layer to reshape to 4D tensor shape: " << new_shape);
190+
auto shuffle = ctx->net->addShuffle(*in);
191+
shuffle->setReshapeDimensions(new_shape);
192+
shuffle->setName((util::node_info(n) + " [Reshape to " + util::toStr(new_shape) + ']').c_str());
193+
in = shuffle->getOutput(0);
194+
}
195+
196+
197+
auto kernel_vec = args[1].unwrapToIntList().vec();
198+
kernel_vec.insert(kernel_vec.begin(), 1);
199+
auto kernel_size = util::toDims(kernel_vec);
200+
LOG_DEBUG("kernel_size: " << kernel_size);
201+
auto stride_vec = args[2].unwrapToIntList().vec();
202+
stride_vec.insert(stride_vec.begin(), 1);
203+
auto stride = util::toDims(stride_vec);
204+
LOG_DEBUG("stride: " << stride);
205+
auto padding_vec = args[3].unwrapToIntList().vec();
206+
padding_vec.insert(padding_vec.begin(), 0);
207+
auto padding = util::toDims(padding_vec);
208+
LOG_DEBUG("padding: " << padding);
209+
210+
bool ceil_mode = args[4].unwrapToBool();
211+
bool count_inlcude_pad = args[5].unwrapToBool();
212+
213+
auto new_layer = ctx->net->addPoolingNd(*in, nvinfer1::PoolingType::kAVERAGE, kernel_size);
214+
TRTORCH_CHECK(new_layer, "Unable to create Avg Pool 2D layer from node: " << *n);
43215

44216
new_layer->setName(util::node_info(n).c_str());
45217
new_layer->setPaddingNd(padding);
46-
if (args[2].unwrapToIntList().size() == 2) {
47-
auto stride = util::toDims(args[2].unwrapToIntList());
48-
new_layer->setStrideNd(stride);
218+
219+
if (stride.nbDims != 2 && ctx->settings.device == nvinfer1::DeviceType::kDLA) {
220+
if (!ctx->settings.allow_gpu_fallback) {
221+
TRTORCH_THROW_ERROR("DLA Pooling stride is limited to 2D, allow GPU fallback");
222+
} else {
223+
LOG_WARNING("DLA Pooling stride is limited to 2D, will run on GPU");
224+
}
49225
}
50226

227+
new_layer->setStrideNd(stride);
228+
51229
auto padding_mode = ceil_mode ? nvinfer1::PaddingMode::kEXPLICIT_ROUND_UP : nvinfer1::PaddingMode::kEXPLICIT_ROUND_DOWN;
52230
new_layer->setPaddingMode(padding_mode);
231+
new_layer->setAverageCountExcludesPadding(!count_inlcude_pad);
53232

54233
new_layer->setName(util::node_info(n).c_str());
55234
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
56235

57236
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
58237
return true;
59238
}
239+
})
240+
.pattern({
241+
"aten::max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=[0, 0], int[2] dilation=[1, 1], bool ceil_mode=False) -> (Tensor)",
242+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
243+
return MaxPoolingConverter(ctx, n, args);
244+
}
245+
}).pattern({
246+
"aten::avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=[0, 0], bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> (Tensor)",
247+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
248+
return AvgPoolingConverter(ctx, n, args);
249+
}
250+
}).pattern({
251+
"aten::max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=[], int[3] dilation=[], bool ceil_mode=False) -> (Tensor)",
252+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
253+
return MaxPoolingConverter(ctx, n, args);
254+
}
255+
}).pattern({
256+
"aten::avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=[], bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> (Tensor)",
257+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
258+
return AvgPoolingConverter(ctx, n, args);
259+
}
60260
}).pattern({
61261
"aten::adaptive_avg_pool2d(Tensor self, int[2] output_size) -> (Tensor)",
62262
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {

core/conversion/converters/impl/shuffle.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,27 @@ static auto shuffle_registrations TRTORCH_UNUSED = RegisterNodeConversionPattern
5959
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle->getOutput(0));
6060
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
6161

62+
return true;
63+
}
64+
}).pattern({
65+
"aten::permute(Tensor(a) self, int[] dims) -> (Tensor(a))",
66+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
67+
auto in = args[0].ITensor();
68+
auto in_shape = util::toVec(in->getDimensions());
69+
auto new_order = args[1].unwrapToIntList().vec();
70+
71+
LOG_DEBUG("Shuffle to: " << util::toDims(new_order));
72+
73+
auto shuffle = ctx->net->addShuffle(*in);
74+
TRTORCH_CHECK(shuffle, "Unable to create shuffle layer from node: " << *n);
75+
nvinfer1::Permutation permute;
76+
std::copy(new_order.begin(), new_order.end(), permute.order);
77+
shuffle->setSecondTranspose(permute);
78+
shuffle->setName(util::node_info(n).c_str());
79+
80+
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle->getOutput(0));
81+
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
82+
6283
return true;
6384
}
6485
});

0 commit comments

Comments
 (0)