Skip to content

feat(//core/partitioning) : Dynamic shapes + fallback #1414

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Nov 17, 2022
Merged
18 changes: 7 additions & 11 deletions core/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,13 @@ partitioning::GraphAndMapping BuildHybridGraph(
auto partitioning_info = cfg.partitioning_info;

auto partitioning_ctx = partitioning::PartitioningCtx(block, partitioning_info);
auto collection_input_ivalues_map =
partitioning::generateRandomInputs(partitioning_info.collection_input_spec_map, first_use_types);
partitioning_ctx.input_types_map = first_use_types;

partitioning::partition(&partitioning_ctx, collection_input_ivalues_map);
// Generate a dictionary of input torch::jit::Value's to their min, opt, max tensors and store in ctx
// TODO: Combine this within partition call
partitioning::populateInputIValues(&partitioning_ctx);

partitioning::partition(&partitioning_ctx);

for (auto& partitioned_block : partitioning_ctx.partitioned_blocks) {
partitioning::PartitionedGraph& segmented_blocks = partitioned_block.second;
Expand All @@ -151,14 +154,7 @@ partitioning::GraphAndMapping BuildHybridGraph(
trt_engine_id << reinterpret_cast<const int*>(&seg_block);

if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
auto shapes = seg_block.in_shapes();
auto types = seg_block.in_types();
std::vector<ir::Input> inputs;
for (size_t i = 0; i < shapes.size(); i++) {
auto in = ir::Input(shapes[i]);
in.dtype = util::ScalarTypeToTRTDataType(types[i]);
inputs.push_back(in);
}
auto inputs = seg_block.construct_inputs_spec();
// update the input ranges for each segments
convert_info.inputs = ir::associate_specs_with_inputs(seg_block.g(), inputs, static_params);

Expand Down
6 changes: 6 additions & 0 deletions core/ir/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ namespace torch_tensorrt {
namespace core {
namespace ir {

enum class ShapeMode {
kMIN,
kOPT,
kMAX,
};

struct Device {
nvinfer1::DeviceType device_type;
int64_t gpu_id;
Expand Down
45 changes: 41 additions & 4 deletions core/partitioning/partitioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,35 @@ void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block) {
return;
}

void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map) {
bool isInputDynamic(PartitioningCtx* ctx) {
// Check if inputs have dynamic shapes
bool input_is_dynamic = true;
auto inputs_map = ctx->settings.collection_input_spec_map;
for (auto inputs : inputs_map) {
for (auto input : inputs.second) {
if (!input.input_is_dynamic) {
input_is_dynamic = false;
}
}
}
return input_is_dynamic;
}

void populateInputIValues(PartitioningCtx* ctx) {
if (isInputDynamic(ctx)) {
ctx->min_input_ivalues_map = partitioning::generateRandomInputs(
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMIN);
ctx->opt_input_ivalues_map = partitioning::generateRandomInputs(
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT);
ctx->max_input_ivalues_map = partitioning::generateRandomInputs(
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMAX);
} else {
ctx->opt_input_ivalues_map = partitioning::generateRandomInputs(
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT);
}
}

void partition(PartitioningCtx* ctx) {
LOG_DEBUG(ctx->settings);

// Go through all the blocks to do the partitioning
Expand All @@ -546,15 +574,24 @@ void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map) {

// It's possible that some TensorRT blocks have nonTensor inputs/output because they are interleaved by Torch blocks
// resolve nonTensor inputs/outputs
LOG_DEBUG("Resolving non-tensor inputs for segmented blocks");
resolveTRTNonTensorInputs(ctx, block);

// register input/output torch::jit::Value for segmented graphs
LOG_DEBUG("Registering input/output torch::jit::Value for segmented graphs");
registerSegmentsOutputs(ctx, block);

// run shape analysis on each segmented block
LOG_DEBUG("Running shape analysis for segmented graphs");
runShapeAnalysis(ctx, block, example_tensor_map);
// Incase of dynamic shape inputs, run shape analysis on each segmented block for min/opt/max ranges and register
// output shapes for each block accordingly
if (isInputDynamic(ctx)) {
LOG_DEBUG("Performing shape analysis for segmented blocks using min/opt/max shapes for inputs");
runShapeAnalysis(ctx, block, ctx->min_input_ivalues_map, ir::ShapeMode::kMIN);
runShapeAnalysis(ctx, block, ctx->opt_input_ivalues_map, ir::ShapeMode::kOPT);
runShapeAnalysis(ctx, block, ctx->max_input_ivalues_map, ir::ShapeMode::kMAX);
} else {
LOG_DEBUG("Performing shape analysis for segmented blocks using static shapes for inputs");
runShapeAnalysis(ctx, block, ctx->opt_input_ivalues_map, ir::ShapeMode::kOPT);
}
}
}

Expand Down
15 changes: 12 additions & 3 deletions core/partitioning/partitioning.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@ typedef std::unordered_map<const torch::jit::Value*, torch::jit::IValue> Example
typedef std::pair<std::shared_ptr<torch::jit::Graph>, std::unordered_map<torch::jit::Value*, torch::jit::Value*>>
GraphAndMapping;

ExampleIValues generateRandomInputs(ir::CollectionInputSpecMap& input_ranges, ir::CollectionTypeMap& input_types);
ExampleIValues generateRandomInputs(
ir::CollectionInputSpecMap& input_ranges,
ir::CollectionTypeMap& input_types,
const ir::ShapeMode& shape_mode = ir::ShapeMode::kOPT);

void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& ivalues_maps);
void populateInputIValues(PartitioningCtx* ctx);

void runShapeAnalysis(
PartitioningCtx* ctx,
torch::jit::Block* block,
ExampleIValues& ivalues_maps,
const ir::ShapeMode& shape_mode);

void segmentGraph(PartitioningCtx* ctx, torch::jit::Block* block);

GraphAndMapping stitch(PartitioningCtx* ctx, torch::jit::Block* block);

void partition(PartitioningCtx* ctx, ExampleIValues& example_tensor_map);
void partition(PartitioningCtx* ctx);

} // namespace partitioning
} // namespace core
Expand Down
4 changes: 4 additions & 0 deletions core/partitioning/partitioningctx/PartitioningCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ struct UsageInfo {
struct PartitioningCtx {
// TODO: Make the set a part of settings not stand alone
PartitioningInfo settings;
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> min_input_ivalues_map;
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> opt_input_ivalues_map;
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> max_input_ivalues_map;
// records all the original blocks topologically in the module
std::vector<torch::jit::Block*> original_blocks;
// mapping: node=> execution status
Expand All @@ -60,6 +63,7 @@ struct PartitioningCtx {
bool shouldNodeRunInTorch(torch::jit::Node* n);
bool shouldNodeRunInTensorRT(torch::jit::Node* n);
std::vector<torch::jit::Node*> getNodesRunInTorch();
std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>> input_types_map;

private:
void _load_nodes_into_decision_map(torch::jit::Block* b);
Expand Down
19 changes: 19 additions & 0 deletions core/partitioning/segmentedblock/SegmentedBlock.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "SegmentedBlock.h"
#include "core/util/prelude.h"

namespace torch_tensorrt {
namespace core {
Expand Down Expand Up @@ -56,6 +57,24 @@ torch::jit::Value* SegmentedBlock::getOrAddInputForValue(torch::jit::Value* old_
}
}

std::vector<ir::Input> SegmentedBlock::construct_inputs_spec() const {
std::vector<ir::Input> inputs;
if (min_shapes_.size() == opt_shapes_.size() && opt_shapes_.size() == max_shapes_.size()) {
for (uint64_t i = 0; i < opt_shapes_.size(); i++) {
auto in = ir::Input(min_shapes_[i], opt_shapes_[i], max_shapes_[i]);
in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]);
inputs.push_back(in);
}
} else {
for (uint64_t i = 0; i < opt_shapes_.size(); i++) {
auto in = ir::Input(opt_shapes_[i]);
in.dtype = util::ScalarTypeToTRTDataType(in_types_[i]);
inputs.push_back(in);
}
}
return inputs;
}

torch::jit::Node* SegmentedBlock::cloneNode(torch::jit::Node* node) {
auto* block = g_->block();
auto env = [&](torch::jit::Value* v) { return getOrAddInputForValue(v); };
Expand Down
26 changes: 21 additions & 5 deletions core/partitioning/segmentedblock/SegmentedBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ struct SegmentedBlock {
SegmentedBlock(BlockID id, SegmentedBlockTarget blk_target, const std::vector<torch::jit::Node*>& nodes);

torch::jit::Value* getOrAddInputForValue(torch::jit::Value* v);
std::vector<ir::Input> construct_inputs_spec() const;
torch::jit::Node* cloneNode(torch::jit::Node* node);
void appendNode(torch::jit::Node* n) {
cloneNode(n);
Expand Down Expand Up @@ -72,18 +73,31 @@ struct SegmentedBlock {
bool contain_raw_value(torch::jit::Value* input) const {
return old_to_new_.count(input);
}
void register_inshapes(std::vector<ir::Input>& in_shapes) {
in_shapes_ = in_shapes;
void register_inshapes(std::vector<std::vector<int64_t>>& in_shapes, const ir::ShapeMode& shape_mode) {
if (shape_mode == ir::ShapeMode::kMIN) {
min_shapes_ = in_shapes;
} else if (shape_mode == ir::ShapeMode::kOPT) {
opt_shapes_ = in_shapes;
} else {
max_shapes_ = in_shapes;
}
}
const std::vector<std::vector<int64_t>> in_opt_shapes() const {
return opt_shapes_;
}
const std::vector<ir::Input>& in_shapes() const {
return in_shapes_;
const std::vector<std::vector<int64_t>> in_min_shapes() const {
return min_shapes_;
}
const std::vector<std::vector<int64_t>> in_max_shapes() const {
return max_shapes_;
}
void register_intypes(std::vector<at::ScalarType>& in_types) {
in_types_ = in_types;
}
const std::vector<at::ScalarType>& in_types() const {
return in_types_;
}

void update_id(BlockID new_id) {
id_ = new_id;
}
Expand All @@ -107,7 +121,9 @@ struct SegmentedBlock {
private:
BlockID id_;
SegmentedBlockTarget target_;
std::vector<ir::Input> in_shapes_;
std::vector<std::vector<int64_t>> min_shapes_;
std::vector<std::vector<int64_t>> opt_shapes_;
std::vector<std::vector<int64_t>> max_shapes_;
std::vector<at::ScalarType> in_types_;
std::vector<torch::jit::Value*> inputs_;
std::vector<torch::jit::Value*> outputs_;
Expand Down
45 changes: 30 additions & 15 deletions core/partitioning/shape_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,25 @@ namespace torch_tensorrt {
namespace core {
namespace partitioning {

at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>& type_opt) {
auto cur_shape = input.input_shape;
std::vector<int64_t> shape;
at::Tensor generateSingleInput(
ir::Input& input,
c10::optional<at::ScalarType>& type_opt,
const ir::ShapeMode& shape_mode) {
nvinfer1::Dims input_shape = input.input_shape;
if (input.input_is_dynamic) {
if (shape_mode == ir::ShapeMode::kMIN) {
input_shape = input.min;
} else if (shape_mode == ir::ShapeMode::kOPT) {
input_shape = input.opt;
} else {
input_shape = input.max;
}
}

// Initialize min and max ranges for random number selection
int LoValIncl = 0;
int HiValExcl = 2;

shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims);

auto type = at::kFloat;
if (type_opt) {
type = type_opt.value();
Expand All @@ -29,14 +38,15 @@ at::Tensor generateSingleInput(ir::Input& input, c10::optional<at::ScalarType>&

// Make the value range for input tensor a uniform (float) distribution
// over [LoValIncl, HiValExcl), then cast to the desired dtype
auto in = ((HiValExcl - LoValIncl) * at::rand(shape, {at::kCUDA}) + LoValIncl).to(type);
auto in = ((HiValExcl - LoValIncl) * at::rand(util::toVec(input_shape), {at::kCUDA}) + LoValIncl).to(type);

return in;
}

std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& inputs,
std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types) {
std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types,
const ir::ShapeMode& shape_mode) {
// generate random inputs for running pytorch segments
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map;

Expand All @@ -45,21 +55,21 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
c10::TypePtr elementType = c10::TensorType::get();
auto generic_list = c10::impl::GenericList(elementType);
for (size_t i = 0; i < input.second.size(); i++) {
auto in = generateSingleInput(input.second[i], types[input.first][i]);
auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode);
generic_list.push_back(in.clone());
}
ivalue_map[input.first] = c10::IValue(generic_list);
} else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) {
// create tuple
std::vector<torch::jit::IValue> list;
for (size_t i = 0; i < input.second.size(); i++) {
auto in = generateSingleInput(input.second[i], types[input.first][i]);
auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode);
list.push_back(in.clone());
}
auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr
ivalue_map[input.first] = c10::IValue(tuple);
} else {
auto in = generateSingleInput(input.second[0], types[input.first][0]);
auto in = generateSingleInput(input.second[0], types[input.first][0], shape_mode);
ivalue_map[input.first] = in.clone();
}
}
Expand Down Expand Up @@ -124,7 +134,8 @@ torch::jit::Node* createCastNode(SegmentedBlock& seg_block, size_t index, bool i
void getSegmentsOutputByRunning(
SegmentedBlock& seg_block,
std::unordered_map<const torch::jit::Value*, torch::jit::IValue>& ivalues_maps,
const PartitioningInfo& partitioning_info) {
const PartitioningInfo& partitioning_info,
const ir::ShapeMode& shape_mode) {
// create a module to run the graph
auto g = seg_block.g();
auto copy_g = g->copy();
Expand Down Expand Up @@ -235,7 +246,7 @@ void getSegmentsOutputByRunning(
}

// set input shape for each segmented block so we wil use it in conversion process
std::vector<ir::Input> input_shapes;
std::vector<std::vector<int64_t>> input_shapes;
std::vector<at::ScalarType> input_types;
for (size_t i = 0; i < seg_block.inputs().size(); ++i) {
if (ivalues_maps[seg_block.raw_inputs()[i]].isTensor()) {
Expand Down Expand Up @@ -270,15 +281,19 @@ void getSegmentsOutputByRunning(
// TODO: tuple and list inputs in subgraph
}

seg_block.register_inshapes(input_shapes);
seg_block.register_inshapes(input_shapes, shape_mode);
seg_block.register_intypes(input_types);
}

void runShapeAnalysis(PartitioningCtx* ctx, torch::jit::Block* block, ExampleIValues& example_tensor_map) {
void runShapeAnalysis(
PartitioningCtx* ctx,
torch::jit::Block* block,
ExampleIValues& example_tensor_map,
const ir::ShapeMode& shape_mode) {
// register every segment's input shape, and it's running output IValues
for (auto& seg_block : ctx->partitioned_blocks[block]) {
torch::jit::ConstantPooling(seg_block.g());
getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings);
getSegmentsOutputByRunning(seg_block, example_tensor_map, ctx->settings, shape_mode);
}
return;
}
Expand Down
Loading