Skip to content

Commit e1a2c15

Browse files
authored
Merge branch 'main' into anurag.dixit/fix_conv1d
2 parents dfbe2e2 + 768f091 commit e1a2c15

File tree

142 files changed

+560
-348
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

142 files changed

+560
-348
lines changed

.github/workflows/docker_builder.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ concurrency:
1616

1717
jobs:
1818
build:
19-
runs-on: linux.2xlarge
19+
runs-on: linux.4xlarge.nvidia.gpu
2020

2121
# Define key environment variables
2222
# Container name is of the form torch_tensorrt:<branch_name>

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ void ConversionCtx::RecordNewITensor(const torch::jit::Value* value, nvinfer1::I
164164

165165
std::string ConversionCtx::SerializeEngine() {
166166
#if NV_TENSORRT_MAJOR > 7
167-
auto serialized_network = builder->buildSerializedNetwork(*net, *cfg);
167+
auto serialized_network = make_trt(builder->buildSerializedNetwork(*net, *cfg));
168168
if (!serialized_network) {
169169
TORCHTRT_THROW_ERROR("Building serialized network failed in TensorRT");
170170
}

core/conversion/converters/impl/conv_deconv.cpp

Lines changed: 88 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,74 @@ namespace converters {
1010
namespace impl {
1111
namespace {
1212

13+
void add_output_padding(nvinfer1::Dims& padding, nvinfer1::Dims& out_padding, bool& has_output_padding) {
14+
int nbSpatialDims = out_padding.nbDims;
15+
// When there is out_padding, if padding is larger than out_padding, just adjust padding Or reduce out_padding as
16+
// minimum as possible.
17+
for (int i = 0; i < nbSpatialDims; ++i) {
18+
if (padding.d[i] - out_padding.d[i] >= 0) {
19+
padding.d[i] -= out_padding.d[i];
20+
out_padding.d[i] = 0;
21+
} else {
22+
// Reduce out_padding as possible.
23+
out_padding.d[i] -= padding.d[i];
24+
padding.d[i] = 0;
25+
has_output_padding = true;
26+
}
27+
}
28+
}
29+
30+
nvinfer1::ILayer* add_bias_layer(
31+
ConversionCtx* ctx,
32+
nvinfer1::ITensor* input_tensor,
33+
nvinfer1::Dims& input_dims,
34+
nvinfer1::Dims& output_padding,
35+
Weights& bias) {
36+
nvinfer1::ITensor* input_shape = ctx->net->addShape(*input_tensor)->getOutput(0);
37+
// Add padding layer
38+
nvinfer1::ITensor* start;
39+
nvinfer1::ITensor* totalPadding;
40+
auto in_nbDims = input_dims.nbDims;
41+
std::vector<int32_t> startVec(in_nbDims, 0);
42+
std::vector<int32_t> totalPaddingVec(in_nbDims, 0);
43+
int32_t diff = in_nbDims - output_padding.nbDims;
44+
for (int32_t i = diff; i < in_nbDims; i++) {
45+
int32_t idx = i - diff;
46+
startVec[i] = 0; // Don't need begin padding, only post padding
47+
totalPaddingVec[i] = output_padding.d[idx];
48+
}
49+
start = tensor_to_const(ctx, torch::tensor(startVec, torch::kInt32));
50+
totalPadding = tensor_to_const(ctx, torch::tensor(totalPaddingVec, torch::kInt32));
51+
52+
const auto size =
53+
ctx->net->addElementWise(*input_shape, *totalPadding, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
54+
55+
nvinfer1::Dims stride;
56+
stride.nbDims = in_nbDims;
57+
for (int64_t i = 0; i < in_nbDims; i++) {
58+
stride.d[i] = 1;
59+
}
60+
const auto& dummy = stride;
61+
auto* sliceLayer = ctx->net->addSlice(*input_tensor, dummy, dummy, stride);
62+
sliceLayer->setInput(1, *start);
63+
sliceLayer->setInput(2, *size);
64+
sliceLayer->setMode(nvinfer1::SliceMode::kFILL);
65+
nvinfer1::ITensor* slice_output = sliceLayer->getOutput(0);
66+
67+
nvinfer1::Dims constantDims;
68+
constantDims.nbDims = in_nbDims;
69+
for (int64_t i = 0; i < in_nbDims; i++) {
70+
constantDims.d[i] = 1;
71+
}
72+
constantDims.d[diff - 1] =
73+
bias.shape.d[0]; // Set C dimension to bias dim and other dimensions to 1 to enable broadcast
74+
auto const_layer = ctx->net->addConstant(constantDims, bias.data);
75+
auto bias_layer =
76+
ctx->net->addElementWise(*slice_output, *const_layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
77+
78+
return bias_layer;
79+
}
80+
1381
bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args) {
1482
// Input to conv/deconv
1583
auto in = args[0].ITensor();
@@ -93,17 +161,31 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
93161
nvinfer1::ILayer* layer = nullptr;
94162
nvinfer1::ITensor* out = nullptr;
95163
if (transposed) {
96-
nvinfer1::IDeconvolutionLayer* deconvLayer =
97-
ctx->net->addDeconvolutionNd(*in, num_output_maps, filter_dim, kernel_weights, bias.data);
164+
// Fix padding based on output_padding provided
165+
nvinfer1::Dims begPadding = padding;
166+
bool hasOutputPadding = false;
167+
add_output_padding(padding, out_padding, hasOutputPadding);
168+
169+
nvinfer1::IDeconvolutionLayer* deconvLayer = ctx->net->addDeconvolutionNd(
170+
*in, num_output_maps, filter_dim, kernel_weights, hasOutputPadding ? nvinfer1::Weights{} : bias.data);
98171
deconvLayer->setStrideNd(stride);
99172
deconvLayer->setDilationNd(dilation);
100173
deconvLayer->setNbGroups(groups);
101-
deconvLayer->setPaddingNd(padding);
174+
deconvLayer->setPrePadding(begPadding);
175+
deconvLayer->setPostPadding(padding);
176+
102177
// Set deconv kernel weights
103178
deconvLayer->setInput(1, *kernel);
104179
TORCHTRT_CHECK(deconvLayer, "Unable to create deconv layer with non-const weights from node: " << *n);
105180
layer = deconvLayer;
106-
out = layer->getOutput(0);
181+
out = deconvLayer->getOutput(0);
182+
if (hasOutputPadding) {
183+
LOG_DEBUG("Padding output deconvolution tensor with:" << out_padding);
184+
nvinfer1::ITensor* tensorPtr = deconvLayer->getOutput(0);
185+
auto dims = in->getDimensions();
186+
layer = add_bias_layer(ctx, tensorPtr, dims, out_padding, bias);
187+
out = layer->getOutput(0);
188+
}
107189
if (expand_dims) {
108190
// Un-expand the expanded dimension
109191
out = addUnpadding(ctx, n, out, original_dim.nbDims);
@@ -178,20 +260,7 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
178260
// https://github.com/onnx/onnx-tensorrt/blob/c3cfcbc8248c6bd007e6630af2085df5e4834b42/builtin_op_importers.cpp#L734
179261
nvinfer1::Dims begPadding = padding;
180262
bool hasOutputPadding = false;
181-
int nbSpatialDims = out_padding.nbDims;
182-
// When there is out_padding, if padding is larger than out_padding, just adjust padding Or reduce out_padding as
183-
// minimum as possible.
184-
for (int i = 0; i < nbSpatialDims; ++i) {
185-
if (padding.d[i] - out_padding.d[i] >= 0) {
186-
padding.d[i] -= out_padding.d[i];
187-
out_padding.d[i] = 0;
188-
} else {
189-
// Reduce out_padding as possible.
190-
out_padding.d[i] -= padding.d[i];
191-
padding.d[i] = 0;
192-
hasOutputPadding = true;
193-
}
194-
}
263+
add_output_padding(padding, out_padding, hasOutputPadding);
195264

196265
// shape of deconvolution's weight: [in, out/groups, ...]
197266
// If there is still output padding, remove the bias. Bias will be added below.
@@ -213,51 +282,8 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
213282
#endif
214283
if (hasOutputPadding) {
215284
LOG_DEBUG("Padding output deconvolution tensor with:" << out_padding);
216-
217-
// Add padding layer
218-
nvinfer1::ITensor* start;
219-
nvinfer1::ITensor* totalPadding;
220-
auto in_nbDims = orig_dims.nbDims;
221-
std::vector<int32_t> startVec(in_nbDims, 0);
222-
std::vector<int32_t> totalPaddingVec(in_nbDims, 0);
223-
int32_t diff = in_nbDims - out_padding.nbDims;
224-
for (int32_t i = diff; i < in_nbDims; i++) {
225-
int32_t idx = i - diff;
226-
startVec[i] = 0; // Don't need begin padding, only post padding
227-
totalPaddingVec[i] = out_padding.d[idx];
228-
}
229-
start = tensor_to_const(ctx, torch::tensor(startVec, torch::kInt32));
230-
totalPadding = tensor_to_const(ctx, torch::tensor(totalPaddingVec, torch::kInt32));
231-
232285
nvinfer1::ITensor* tensorPtr = deconv->getOutput(0);
233-
nvinfer1::ITensor* deconvOutShape = ctx->net->addShape(*tensorPtr)->getOutput(0);
234-
const auto size =
235-
ctx->net->addElementWise(*deconvOutShape, *totalPadding, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
236-
237-
nvinfer1::Dims stride;
238-
stride.nbDims = in_nbDims;
239-
for (int64_t i = 0; i < in_nbDims; i++) {
240-
stride.d[i] = 1;
241-
}
242-
const auto& dummy = stride;
243-
auto* sliceLayer = ctx->net->addSlice(*tensorPtr, dummy, dummy, stride);
244-
sliceLayer->setInput(1, *start);
245-
sliceLayer->setInput(2, *size);
246-
sliceLayer->setMode(nvinfer1::SliceMode::kFILL);
247-
tensorPtr = sliceLayer->getOutput(0);
248-
249-
nvinfer1::Dims constantDims;
250-
constantDims.nbDims = in_nbDims;
251-
for (int64_t i = 0; i < in_nbDims; i++) {
252-
constantDims.d[i] = 1;
253-
}
254-
constantDims.d[diff - 1] =
255-
bias.shape.d[0]; // Set C dimension to bias dim and other dimensions to 1 to enable broadcast
256-
auto const_layer = ctx->net->addConstant(constantDims, bias.data);
257-
auto add_bias_layer =
258-
ctx->net->addElementWise(*tensorPtr, *const_layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
259-
260-
new_layer = add_bias_layer;
286+
new_layer = add_bias_layer(ctx, tensorPtr, orig_dims, out_padding, bias);
261287
} else {
262288
new_layer = deconv;
263289
}

core/conversion/converters/impl/matrix_multiply.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,49 @@ auto mm_registrations TORCHTRT_UNUSED =
1616
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
1717
auto self = args[0].ITensorOrFreeze(ctx);
1818
auto other = args[1].ITensorOrFreeze(ctx);
19+
20+
auto selfDims = self->getDimensions().nbDims;
21+
auto otherDims = other->getDimensions().nbDims;
22+
23+
bool squeezeFront = false;
24+
bool squeezeBack = false;
25+
26+
if (selfDims == 1 && selfDims < otherDims) {
27+
squeezeFront = true;
28+
} else if (otherDims == 1 && otherDims < selfDims) {
29+
// Append a 1 to the end of the shape before padding front to match self
30+
other = addPadding(ctx, n, other, 2, true, false);
31+
otherDims = other->getDimensions().nbDims;
32+
squeezeBack = true;
33+
}
34+
1935
// Ensure self and other tensors have same nbDims by expanding the dimensions (from 0 axis) if
2036
// necessary.
21-
if (self->getDimensions().nbDims < other->getDimensions().nbDims) {
22-
self = addPadding(ctx, n, self, other->getDimensions().nbDims, false, false);
23-
} else {
24-
other = addPadding(ctx, n, other, self->getDimensions().nbDims, false, false);
37+
if (selfDims < otherDims) {
38+
self = addPadding(ctx, n, self, otherDims, false, false);
39+
} else if (otherDims < selfDims) {
40+
other = addPadding(ctx, n, other, selfDims, false, false);
2541
}
2642

2743
auto mm_layer = ctx->net->addMatrixMultiply(
2844
*self, nvinfer1::MatrixOperation::kNONE, *other, nvinfer1::MatrixOperation::kNONE);
2945

3046
TORCHTRT_CHECK(mm_layer, "Unable to create matrix multiplication node: " << *n);
3147
mm_layer->setName(util::node_info(n).c_str());
32-
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], mm_layer->getOutput(0));
48+
auto out = mm_layer->getOutput(0);
49+
50+
if (squeezeFront || squeezeBack) {
51+
auto squeezeDimOffset = squeezeFront ? 2 : 1;
52+
auto reshapeDims =
53+
util::squeezeDims(out->getDimensions(), out->getDimensions().nbDims - squeezeDimOffset);
54+
auto shuffle_layer = ctx->net->addShuffle(*out);
55+
LOG_DEBUG("Squeezing matmul output for 1d correction: " << reshapeDims);
56+
TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
57+
shuffle_layer->setReshapeDimensions(reshapeDims);
58+
shuffle_layer->setName((util::node_info(n) + "_squeeze").c_str());
59+
out = shuffle_layer->getOutput(0);
60+
}
61+
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out);
3362

3463
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
3564
return true;

core/runtime/register_jit_hooks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
8787
.def_pickle(
8888
[](const c10::intrusive_ptr<TRTEngine>& self) -> std::vector<std::string> {
8989
// Serialize TensorRT engine
90-
auto serialized_trt_engine = self->cuda_engine->serialize();
90+
auto serialized_trt_engine = make_trt(self->cuda_engine->serialize());
9191

9292
// Adding device info related meta data to the serialized file
9393
auto trt_engine = std::string((const char*)serialized_trt_engine->data(), serialized_trt_engine->size());

docs/_cpp_api/classtorch__tensorrt_1_1DataType.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Class DataType &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Class DataType &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Class Device::DeviceType &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Class Device::DeviceType &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Class TensorFormat &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Class TensorFormat &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Template Class Int8CacheCalibrator &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Template Class Int8CacheCalibrator &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Template Class Int8Calibrator &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Template Class Int8Calibrator &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Define STR &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Define STR &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Define TORCH_TENSORRT_PATCH_VERSION &mdash; Torch-TensorRT v2.2.0.dev0+87fc615 documentation</title>
13+
<title>Define TORCH_TENSORRT_PATCH_VERSION &mdash; Torch-TensorRT v2.2.0.dev0+d741d2c documentation</title>
1414

1515

1616

@@ -237,7 +237,7 @@
237237

238238

239239
<div class="version">
240-
v2.2.0.dev0+87fc615
240+
v2.2.0.dev0+d741d2c
241241
</div>
242242

243243

0 commit comments

Comments
 (0)