Skip to content

Commit da2cbc0

Browse files
committed
chore: Rebasing to master
Signed-off-by: Dheeraj Peri <[email protected]>
2 parents 5df2536 + 2fbbbc1 commit da2cbc0

File tree

121 files changed

+9170
-2167
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+9170
-2167
lines changed

README.md

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@ More Information / System Architecture:
1818
#include "trtorch/trtorch.h"
1919

2020
...
21-
auto compile_settings = trtorch::CompileSpec(dims);
21+
// Set input datatypes. Allowerd options torch::{kFloat, kHalf, kChar, kInt32, kBool}
22+
// Size of input_dtypes should match number of inputs to the network.
23+
// If input_dtypes is not set, default precision follows traditional PyT / TRT rules
24+
auto input = trtorch::CompileSpec::Input(dims, torch::kHalf)
25+
auto compile_settings = trtorch::CompileSpec({input});
2226
// FP16 execution
23-
compile_settings.op_precision = torch::kFloat;
27+
compile_settings.enabled_precisions = {torch::kHalf};
2428
// Compile module
2529
auto trt_mod = trtorch::CompileGraph(ts_mod, compile_settings);
2630
// Run like normal
@@ -36,14 +40,14 @@ import trtorch
3640
3741
...
3842
compile_settings = {
39-
"input_shapes": [
40-
{
41-
"min": [1, 3, 224, 224],
42-
"opt": [1, 3, 512, 512],
43-
"max": [1, 3, 1024, 1024]
44-
}, # For static size [1, 3, 224, 224]
45-
],
46-
"op_precision": torch.half # Run with FP16
43+
"inputs": [trtorch.Input(
44+
min_shape=[1, 3, 224, 224],
45+
opt_shape=[1, 3, 512, 512],
46+
max_shape=[1, 3, 1024, 1024]
47+
# For static size shape=[1, 3, 224, 224]
48+
dtype=torch.half, # Datatype of input tensor. Allowed options torch.(float|half|int8|int32|bool)
49+
)],
50+
"enabled_precision": {torch.half}, # Run with FP16
4751
}
4852
4953
trt_ts_module = trtorch.compile(torch_script_module, compile_settings)
@@ -54,9 +58,9 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts")
5458
```
5559

5660
> Notes on running in lower precisions:
57-
> - Set precision with compile_spec.op_precision
61+
> - Enabled lower precisions with compile_spec.enabled_precisions
5862
> - The module should be left in FP32 before compilation (FP16 can support half tensor models)
59-
> - In FP16 only input tensors should be converted to FP16, other precisions use FP32
63+
> - In FP16 only input tensors by default should be FP16, other precisions use FP32. This can be overrided by setting Input::dtype
6064
6165
## Platform Support
6266

core/compiler.cpp

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,12 @@ GraphAndMapping ConstructFallbackGraph(
259259
trt_engine_id << reinterpret_cast<const int*>(&seg_block);
260260

261261
if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
262-
std::vector<ir::InputRange> input_ranges;
262+
std::vector<ir::Input> inputs;
263263
for (auto& shape : seg_block.in_shape()) {
264-
input_ranges.push_back(ir::InputRange(shape));
264+
inputs.push_back(ir::InputRange(shape));
265265
}
266266
// update the input ranges for each segments
267-
convert_cfg.input_ranges = input_ranges;
267+
convert_cfg.inputs = inputs;
268268
auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
269269
auto temp_g = std::make_shared<torch::jit::Graph>();
270270
auto device_spec = convert_cfg.engine_settings.device;
@@ -316,11 +316,11 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
316316
auto named_params = conversion::get_named_params(g->inputs(), params);
317317
LOG_INFO("(LoweredGraph)\n" << *g);
318318

319-
std::unordered_map<torch::jit::Value*, ir::InputRange> input_ranges;
319+
std::unordered_map<torch::jit::Value*, ir::Input> inputs;
320320
for (size_t i = 0; i < g->inputs().size(); ++i) {
321-
input_ranges.insert({g->inputs()[i], cfg.convert_info.input_ranges[i]});
321+
inputs.insert({g->inputs()[i], cfg.convert_info.inputs[i]});
322322
}
323-
auto input_ivalues_map = partitioning::generateRandomInputs(input_ranges);
323+
auto input_ivalues_map = partitioning::generateRandomInputs(inputs);
324324
auto graph_and_mapping = ConstructFallbackGraph(new_mod, g->block(), input_ivalues_map, cfg, named_params);
325325
new_g = graph_and_mapping.first;
326326
LOG_INFO("(FallbackGraph)\n" << *new_g);
@@ -332,6 +332,46 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
332332
return mod;
333333
}
334334

335+
// <<<<<<< HEAD
336+
// =======
337+
// std::unordered_map<torch::jit::Value*, torch::jit::Value*> old_to_new_g;
338+
// // add global graph's input to old_to_new_g mapping
339+
// for (auto input : g->inputs()) {
340+
// util::getOrAddInputForValue(input, new_g, old_to_new_g);
341+
// }
342+
// for (auto& seg_block : segmented_blocks) {
343+
// std::string cur_block_target =
344+
// seg_block.target() == partitioning::SegmentedBlock::kTensorRT ? "TensorRT" : "Torch";
345+
// LOG_INFO(*seg_block.g() << "(Sub Graph" << cur_block_target << "Block)\n");
346+
// std::ostringstream trt_engine_id;
347+
// trt_engine_id << reinterpret_cast<const int*>(&seg_block);
348+
// if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
349+
// std::vector<ir::Input> inputs;
350+
// for (auto& shape : seg_block.in_shape()) {
351+
// inputs.push_back(ir::Input(shape));
352+
// }
353+
// // update the input ranges for each segments
354+
// convert_cfg.inputs = inputs;
355+
// auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
356+
// auto temp_g = std::make_shared<torch::jit::Graph>();
357+
// auto device_spec = convert_cfg.engine_settings.device;
358+
// auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
359+
// AddEngineToGraph(new_mod, temp_g, engine, cuda_device, trt_engine_id.str(), true);
360+
//
361+
// seg_block.update_graph(temp_g);
362+
// AddSegmentedBlockToGraph(new_g, seg_block, old_to_new_g);
363+
// } else {
364+
// AddSegmentedBlockToGraph(new_g, seg_block, old_to_new_g);
365+
// }
366+
// }
367+
//
368+
// for (auto& output : g->outputs()) {
369+
// new_g->registerOutput(old_to_new_g[output]);
370+
// }
371+
//
372+
// LOG_INFO(*new_g << "(FallbackGraph)\n");
373+
//
374+
// >>>>>>> master
335375
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
336376
auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
337377
new_mod.type()->addMethod(new_method);

core/compiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace trtorch {
1212
namespace core {
1313

1414
struct CompileSpec {
15-
CompileSpec(std::vector<ir::InputRange> input_ranges) : convert_info(std::move(input_ranges)) {}
15+
CompileSpec(std::vector<ir::Input> inputs) : convert_info(std::move(inputs)) {}
1616
conversion::ConversionInfo convert_info;
1717
partitioning::PartitionInfo partition_info;
1818
};

core/conversion/conversion.cpp

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,7 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
125125
<< "please report this error to https://www.github.com/NVIDIA/TRTorch/issues");
126126
}
127127

128-
void AddInputs(
129-
ConversionCtx* ctx,
130-
at::ArrayRef<const torch::jit::Value*> inputs,
131-
std::vector<ir::InputRange>& input_dims) {
128+
void AddInputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> inputs, std::vector<ir::Input>& input_specs) {
132129
std::vector<const torch::jit::Value*> input_tensors;
133130
for (auto in : inputs) {
134131
// Disregarding inputs that are not tensors
@@ -142,29 +139,40 @@ void AddInputs(
142139
}
143140
}
144141

142+
std::stringstream ss;
143+
ss << "Input Dimension Specs: [\n";
144+
for (auto i : input_specs) {
145+
ss << " " << i << ",";
146+
}
147+
ss << ']';
148+
LOG_DEBUG(ctx->logger, ss.str());
149+
145150
TRTORCH_CHECK(
146-
input_tensors.size() == input_dims.size(),
151+
input_tensors.size() == input_specs.size(),
147152
"Expected dimension specifications for all input tensors"
148-
<< ", but found " << input_tensors.size() << " input tensors and " << input_dims.size()
153+
<< ", but found " << input_tensors.size() << " input tensors and " << input_specs.size()
149154
<< " dimension specs (conversion.AddInputs)");
150155

151156
auto profile = ctx->builder->createOptimizationProfile();
152157

153158
for (size_t i = 0; i < input_tensors.size(); i++) {
154159
auto in = input_tensors[i];
155-
auto dims = input_dims[i];
160+
auto spec = input_specs[i];
156161
std::string name = std::string("input_") + std::to_string(ctx->num_inputs);
157162
LOG_INFO(
158-
ctx->logger, "Adding Input " << in->debugName() << " named " << name << " in engine (conversion.AddInputs)");
159-
LOG_DEBUG(ctx->logger, "Input shape set to " << dims.input_shape);
160-
auto trt_in = ctx->net->addInput(name.c_str(), ctx->input_type, dims.input_shape);
163+
ctx->logger,
164+
"Adding Input " << in->debugName() << " (named: " << name << "): " << spec
165+
<< " in engine (conversion.AddInputs)");
166+
167+
auto trt_in = ctx->net->addInput(name.c_str(), spec.dtype, spec.input_shape);
161168
TRTORCH_CHECK(trt_in, "Failed to add input node: " << in->debugName() << " (conversion.AddInputs)");
169+
trt_in->setAllowedFormats(1U << static_cast<int>(spec.format));
162170

163-
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMIN, dims.min);
164-
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kOPT, dims.opt);
165-
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMAX, dims.max);
171+
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMIN, spec.min);
172+
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kOPT, spec.opt);
173+
profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMAX, spec.max);
166174

167-
if (dims.input_is_dynamic) {
175+
if (spec.input_is_dynamic) {
168176
ctx->input_is_dynamic = true;
169177
}
170178

@@ -178,7 +186,7 @@ void AddInputs(
178186

179187
ctx->cfg->addOptimizationProfile(profile);
180188
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
181-
if (ctx->op_precision == nvinfer1::DataType::kINT8) {
189+
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
182190
ctx->cfg->setCalibrationProfile(profile);
183191
}
184192
#endif
@@ -350,7 +358,7 @@ void ConvertBlockToNetDef(
350358

351359
auto inputs = b->inputs();
352360
AddParamsToCtxValueMap(ctx, static_params);
353-
AddInputs(ctx, inputs, build_info.input_ranges);
361+
AddInputs(ctx, inputs, build_info.inputs);
354362

355363
auto nodes = b->nodes();
356364

@@ -428,8 +436,8 @@ std::string ConvertBlockToEngine(const torch::jit::Block* b, ConversionInfo buil
428436
return engine;
429437
}
430438

431-
std::set<std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
432-
std::set<std::string> unsupported_ops;
439+
std::unordered_map<c10::OperatorName, std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
440+
std::unordered_map<c10::OperatorName, std::string> unsupported_ops;
433441
for (const auto n : b->nodes()) {
434442
if (n->kind() != torch::jit::prim::Loop && n->kind() != torch::jit::prim::If && !OpSupported(n)) {
435443
auto schema = n->maybeSchema();
@@ -438,7 +446,7 @@ std::set<std::string> GetUnsupportedOpsInBlock(const torch::jit::Block* b) {
438446
"Unable to get schema for Node " << util::node_info(n) << " (conversion.VerifyCoverterSupportForBlock)");
439447
std::stringstream ss;
440448
ss << *schema;
441-
unsupported_ops.insert(ss.str());
449+
unsupported_ops[schema->operator_name()] = ss.str();
442450
}
443451
for (const auto sub_b : n->blocks()) {
444452
auto sub_b_unsupported_ops = GetUnsupportedOpsInBlock(sub_b);
@@ -480,12 +488,27 @@ bool VerifyConverterSupportForBlock(const torch::jit::Block* b) {
480488
unsupported_msg << "Method requested cannot be compiled by TRTorch.\nUnsupported operators listed below:"
481489
<< std::endl;
482490
for (auto s : unsupported_ops) {
483-
unsupported_msg << " - " << s << std::endl;
491+
unsupported_msg << " - " << s.second << std::endl;
484492
}
485493
unsupported_msg << "You can either implement converters for these ops in your application or request implementation"
486494
<< std::endl;
487495
unsupported_msg << "https://www.github.com/nvidia/TRTorch/issues" << std::endl;
496+
unsupported_msg << std::endl << "In Module:" << std::endl;
497+
488498
LOG_ERROR(unsupported_msg.str());
499+
500+
for (const auto n : b->nodes()) {
501+
auto schema = n->maybeSchema();
502+
if (schema) {
503+
for (const auto& x : unsupported_ops) {
504+
if (x.first == schema->operator_name()) {
505+
LOG_ERROR(
506+
"Unsupported operator: " << *schema << std::endl
507+
<< trtorch::core::util::GetPyTorchSourceCode(n) << std::endl);
508+
}
509+
}
510+
}
511+
}
489512
return false;
490513
}
491514

core/conversion/conversion.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@ namespace core {
1212
namespace conversion {
1313

1414
struct ConversionInfo {
15-
std::vector<ir::InputRange> input_ranges;
15+
std::vector<ir::Input> inputs;
1616
BuilderSettings engine_settings;
17-
ConversionInfo(std::vector<ir::InputRange> input_ranges)
18-
: input_ranges(std::move(input_ranges)), engine_settings(BuilderSettings()) {}
17+
ConversionInfo(std::vector<ir::Input> inputs) : inputs(std::move(inputs)), engine_settings(BuilderSettings()) {}
1918
};
2019

2120
// TODO: REMOVE GRAPH AND PARAMS AND MOVE FULLY TO INLINED CONSTANTS

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@ namespace conversion {
1010
// clang-format off
1111
std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
1212
os << "Settings requested for TensorRT engine:" \
13-
<< "\n Operating Precision: " << s.op_precision \
14-
<< "\n TF32 Floating Point Computation Enabled: " << !s.disable_tf32 \
13+
<< "\n Enabled Precisions: ";
14+
for (auto p = s.enabled_precisions.begin(); p != s.enabled_precisions.end(); ++p) {
15+
os << *p << ' ';
16+
}
17+
os << "\n TF32 Floating Point Computation Enabled: " << !s.disable_tf32 \
1518
<< "\n Truncate Long and Double: " << s.truncate_long_and_double \
1619
<< "\n Make Refittable Engine: " << s.refit \
1720
<< "\n Debuggable Engine: " << s.debug \
@@ -57,30 +60,31 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
5760
LOG_DEBUG(build_settings);
5861
cfg = builder->createBuilderConfig();
5962

60-
switch (settings.op_precision) {
61-
case nvinfer1::DataType::kHALF:
62-
TRTORCH_CHECK(builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16");
63-
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
64-
input_type = nvinfer1::DataType::kHALF;
65-
break;
66-
case nvinfer1::DataType::kINT8:
67-
TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
68-
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
69-
if (!settings.strict_types) {
63+
for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) {
64+
switch (*p) {
65+
case nvinfer1::DataType::kHALF:
66+
TRTORCH_CHECK(builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16");
7067
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
71-
}
72-
input_type = nvinfer1::DataType::kFLOAT;
73-
TRTORCH_CHECK(
74-
settings.calibrator != nullptr,
75-
"Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator");
76-
cfg->setInt8Calibrator(settings.calibrator);
77-
break;
78-
case nvinfer1::DataType::kFLOAT:
79-
default:
80-
input_type = nvinfer1::DataType::kFLOAT;
81-
break;
68+
break;
69+
case nvinfer1::DataType::kINT8:
70+
TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
71+
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
72+
TRTORCH_CHECK(
73+
settings.calibrator != nullptr,
74+
"Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator");
75+
cfg->setInt8Calibrator(settings.calibrator);
76+
break;
77+
case nvinfer1::DataType::kFLOAT:
78+
break;
79+
case nvinfer1::DataType::kINT32:
80+
case nvinfer1::DataType::kBOOL:
81+
default:
82+
TRTORCH_THROW_ERROR(
83+
"Requested kernel precision that is unsupported: " << *p << " options are float, half, int8");
84+
}
8285
}
83-
op_precision = settings.op_precision;
86+
87+
enabled_precisions = settings.enabled_precisions;
8488

8589
if (settings.disable_tf32) {
8690
cfg->clearFlag(nvinfer1::BuilderFlag::kTF32);
@@ -118,7 +122,9 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
118122
static_cast<int>(settings.device.dla_core) < nbDLACores,
119123
"Configured DLA Core ID: " << settings.device.dla_core
120124
<< " not available. Total number of available DLA Cores: " << nbDLACores);
121-
TRTORCH_CHECK(settings.op_precision != nvinfer1::DataType::kFLOAT, "DLA supports only fp16 or int8 precision");
125+
TRTORCH_CHECK(
126+
settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
127+
"DLA supports only fp16 or int8 precision");
122128
cfg->setDLACore(settings.device.dla_core);
123129
}
124130
}

core/conversion/conversionctx/ConversionCtx.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <map>
44
#include <memory>
5+
#include <set>
56
#include <unordered_map>
67

78
#include "NvInfer.h"
@@ -23,7 +24,8 @@ struct Device {
2324
};
2425

2526
struct BuilderSettings {
26-
nvinfer1::DataType op_precision = nvinfer1::DataType::kFLOAT;
27+
std::set<nvinfer1::DataType> enabled_precisions = {nvinfer1::DataType::kFLOAT};
28+
std::vector<nvinfer1::DataType> input_dtypes;
2729
bool disable_tf32 = false;
2830
bool refit = false;
2931
bool debug = false;
@@ -57,8 +59,7 @@ struct ConversionCtx {
5759
nvinfer1::IBuilder* builder;
5860
nvinfer1::INetworkDefinition* net;
5961
nvinfer1::IBuilderConfig* cfg;
60-
nvinfer1::DataType input_type;
61-
nvinfer1::DataType op_precision;
62+
std::set<nvinfer1::DataType> enabled_precisions;
6263
BuilderSettings settings;
6364
util::logging::TRTorchLogger logger;
6465
// Pointers to data that needs to remain alive until conversion is done

core/conversion/converters/impl/activation.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ auto acthardtanh TRTORCH_UNUSED =
177177
std::string pluginName = "CustomGeluPluginDynamic";
178178
nvinfer1::PluginFieldCollection fc;
179179
std::vector<nvinfer1::PluginField> f;
180-
int type_id = ctx->settings.op_precision == nvinfer1::DataType::kFLOAT
180+
// REVIEW is this right?
181+
int type_id = ctx->settings.enabled_precisions.find(nvinfer1::DataType::kHALF) ==
182+
ctx->settings.enabled_precisions.end()
181183
? 0
182184
: 1; // Integer encoding the DataType (0: FP32, 1: FP16)
183185
f.emplace_back(nvinfer1::PluginField("type_id", &type_id, nvinfer1::PluginFieldType::kINT32, 1));

0 commit comments

Comments
 (0)