|
1 |
| -#include "core/partitioning/shape_analysis.h" |
2 |
| -#include <ATen/ATen.h> |
3 |
| -#include <inttypes.h> |
4 |
| -#include "core/util/prelude.h" |
5 |
| -#include "torch/csrc/jit/api/module.h" |
6 |
| -#include "torch/csrc/jit/passes/constant_pooling.h" |
7 |
| - |
8 |
| -namespace torch_tensorrt { |
9 |
| -namespace core { |
10 |
| -namespace partitioning { |
11 |
| - |
12 |
| -std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs( |
13 |
| - std::unordered_map<const torch::jit::Value*, ir::Input>& inputs, |
14 |
| - std::unordered_map<const torch::jit::Value*, c10::optional<at::ScalarType>>& types) { |
15 |
| - // generate random inputs for running pytorch segments |
16 |
| - std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map; |
17 |
| - |
18 |
| - uint64_t in_i = 0; |
19 |
| - for (auto& input : inputs) { |
20 |
| - auto cur_shape = input.second.input_shape; |
21 |
| - std::vector<int64_t> shape; |
22 |
| - shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); |
23 |
| - auto type_opt = types[input.first]; |
24 |
| - auto type = at::kFloat; |
25 |
| - if (type_opt) { |
26 |
| - type = type_opt.value(); |
27 |
| - } else { |
28 |
| - LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); |
29 |
| - } |
30 |
| - auto in = at::randint(5, shape, {at::kCUDA}).to(type); |
31 |
| - ivalue_map[input.first] = in.clone(); |
32 |
| - in_i++; |
33 |
| - } |
34 |
| - return ivalue_map; |
35 |
| -} |
36 |
| - |
37 |
| -void getSegmentsOutputByRunning( |
38 |
| - SegmentedBlock& seg_block, |
39 |
| - std::unordered_map<const torch::jit::Value*, torch::jit::IValue>& ivalues_maps, |
40 |
| - const PartitionInfo& partition_info) { |
41 |
| - // create a module to run the graph |
42 |
| - auto g = seg_block.g(); |
43 |
| - auto copy_g = g->copy(); |
44 |
| - |
45 |
| - // create tuple for multiple outputs |
46 |
| - if (seg_block.raw_outputs().size() > 1) { |
47 |
| - auto new_output_node = copy_g->appendNode(copy_g->createTuple(copy_g->outputs())); |
48 |
| - for (int idx = copy_g->outputs().size() - 1; idx >= 0; --idx) { |
49 |
| - copy_g->eraseOutput(idx); |
50 |
| - } |
51 |
| - |
52 |
| - copy_g->registerOutput(new_output_node->outputs()[0]); |
53 |
| - } |
54 |
| - |
55 |
| - torch::jit::script::Module cur_mod(c10::QualifiedName("module")); |
56 |
| - |
57 |
| - auto self = copy_g->insertInput(0, "self_1"); |
58 |
| - self->setType(cur_mod.type()); |
59 |
| - |
60 |
| - auto cur_method = cur_mod._ivalue()->compilation_unit()->create_function(c10::QualifiedName("forward"), copy_g); |
61 |
| - auto schema = util::GenerateGraphSchema(cur_method->name(), copy_g); |
62 |
| - cur_mod.type()->addMethod(cur_method); |
63 |
| - cur_method->setSchema(schema); |
64 |
| - |
65 |
| - std::vector<torch::jit::IValue> jit_inputs_ivalues; |
66 |
| - |
67 |
| - // set inputs ivalues, now supports Tensor/Int to pass argumentes between different segments |
68 |
| - for (auto& input : seg_block.raw_inputs()) { |
69 |
| - TORCHTRT_CHECK( |
70 |
| - ivalues_maps.count(input), |
71 |
| - "Could not find torch::jit::Value* " << input->debugName() << " produced from " |
72 |
| - << util::node_info(input->node()) |
73 |
| - << " in lowering graph for mini graph input.\n"); |
74 |
| - if (input->node()->kind() == torch::jit::prim::Param) { |
75 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input]); |
76 |
| - } else if (input->type()->isSubtypeOf(torch::jit::TensorType::get())) { |
77 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toTensor()); |
78 |
| - } else if (input->type()->isSubtypeOf(torch::jit::IntType::get())) { |
79 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toInt()); |
80 |
| - } else if (input->type()->isSubtypeOf(torch::jit::BoolType::get())) { |
81 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); |
82 |
| - } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { |
83 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); |
84 |
| - } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { |
85 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); |
86 |
| - } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { |
87 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toScalar()); |
88 |
| - } else if (input->type()->kind() == torch::jit::TypeKind::DictType) { |
89 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toGenericDict()); |
90 |
| - } else if (input->type()->kind() == torch::jit::TypeKind::DeviceObjType) { |
91 |
| - jit_inputs_ivalues.push_back(ivalues_maps[input].toDevice()); |
92 |
| - } else { |
93 |
| - TORCHTRT_THROW_ERROR( |
94 |
| - "Expected to find type " << input->type()->str() << " for value " << input->debugName() |
95 |
| - << " but get nothing. "); |
96 |
| - } |
97 |
| - } |
98 |
| - |
99 |
| - // run segments to get outputs for later segments input shape, and other arguments such as Int |
100 |
| - std::vector<torch::jit::IValue> jit_results; |
101 |
| - torch::jit::IValue jit_results_ivalues = cur_mod.forward(jit_inputs_ivalues); |
102 |
| - |
103 |
| - if (jit_results_ivalues.isTuple()) { |
104 |
| - auto results = jit_results_ivalues.toTuple()->elements(); |
105 |
| - for (auto r : results) { |
106 |
| - jit_results.push_back(r); |
107 |
| - } |
108 |
| - } else { |
109 |
| - jit_results.push_back(jit_results_ivalues); |
110 |
| - } |
111 |
| - |
112 |
| - size_t idx = 0; |
113 |
| - for (auto& output : seg_block.raw_outputs()) { |
114 |
| - ivalues_maps[output] = jit_results[idx++]; |
115 |
| - } |
116 |
| - |
117 |
| - // set input shape for each segmented block so we wil use it in conversion process |
118 |
| - std::vector<ir::Input> input_shapes; |
119 |
| - std::vector<at::ScalarType> input_types; |
120 |
| - for (auto& i : seg_block.raw_inputs()) { |
121 |
| - if (ivalues_maps[i].isTensor()) { |
122 |
| - // set the input_shape and data_type |
123 |
| - // we can use a temp value here instead of replacing the values in ivalues_map since we only use ivalues_map for |
124 |
| - // shape inference |
125 |
| - auto cur_ivalue = ivalues_maps[i]; |
126 |
| - at::ScalarType t = cur_ivalue.toTensor().scalar_type(); |
127 |
| - if (!partition_info.truncate_long_and_double && (t == at::kLong || t == at::kDouble)) { |
128 |
| - TORCHTRT_THROW_ERROR( |
129 |
| - "Unable to process subgraph input type of at::kLong/at::kDouble, try to compile model with truncate_long_and_double enabled"); |
130 |
| - } else if (partition_info.truncate_long_and_double && t == at::kLong) { |
131 |
| - cur_ivalue = cur_ivalue.toTensor().to(at::kInt); |
132 |
| - LOG_WARNING("Truncating graph input type from at::kLong to at::kInt"); |
133 |
| - } else if (partition_info.truncate_long_and_double && t == at::kDouble) { |
134 |
| - cur_ivalue = cur_ivalue.toTensor().to(at::kFloat); |
135 |
| - LOG_WARNING("Truncating graph input type from at::kDouble to at::kFloat"); |
136 |
| - } |
137 |
| - c10::optional<nvinfer1::DataType> dtype = util::optTypeMetaToTRTDataType(cur_ivalue.toTensor().dtype()); |
138 |
| - if (dtype == c10::nullopt) { |
139 |
| - TORCHTRT_THROW_ERROR("Unsupported input data type " << cur_ivalue.toTensor().dtype()); |
140 |
| - } |
141 |
| - if (cur_ivalue.toTensor().sizes().size() == 0) { |
142 |
| - // handle Scalar types, which has sizes of [] |
143 |
| - input_shapes.push_back(util::toVec(util::toDims(c10::List<int64_t>({1})))); |
144 |
| - } else { |
145 |
| - input_shapes.push_back(util::toVec(util::toDims(cur_ivalue.toTensor().sizes()))); |
146 |
| - } |
147 |
| - input_types.push_back(cur_ivalue.toTensor().scalar_type()); |
148 |
| - } |
149 |
| - } |
150 |
| - |
151 |
| - seg_block.register_inshapes(input_shapes); |
152 |
| - seg_block.register_intypes(input_types); |
153 |
| -} |
154 |
| - |
155 |
| -void runShapeAnalysis( |
156 |
| - std::vector<SegmentedBlock>& segmented_blocks, |
157 |
| - std::unordered_map<const torch::jit::Value*, torch::jit::IValue>& example_tensor_map, |
158 |
| - const PartitionInfo& partition_info) { |
159 |
| - // register every segment's input shape, and it's running output IValues |
160 |
| - for (auto& seg_block : segmented_blocks) { |
161 |
| - torch::jit::ConstantPooling(seg_block.g()); |
162 |
| - getSegmentsOutputByRunning(seg_block, example_tensor_map, partition_info); |
163 |
| - } |
164 |
| - return; |
165 |
| -} |
166 |
| - |
167 |
| -} // namespace partitioning |
168 |
| -} // namespace core |
169 |
| -} // namespace torch_tensorrt |
| 1 | +#include "core/partitioning/shape_analysis.h" |
| 2 | +#include <ATen/ATen.h> |
| 3 | +#include <cstdint> |
| 4 | +#include "core/util/prelude.h" |
| 5 | +#include "torch/csrc/jit/api/module.h" |
| 6 | +#include "torch/csrc/jit/passes/constant_pooling.h" |
| 7 | + |
| 8 | +namespace torch_tensorrt { |
| 9 | +namespace core { |
| 10 | +namespace partitioning { |
| 11 | + |
| 12 | +std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs( |
| 13 | + std::unordered_map<const torch::jit::Value*, ir::Input>& inputs, |
| 14 | + std::unordered_map<const torch::jit::Value*, c10::optional<at::ScalarType>>& types) { |
| 15 | + // generate random inputs for running pytorch segments |
| 16 | + std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map; |
| 17 | + |
| 18 | + uint64_t in_i = 0; |
| 19 | + for (auto& input : inputs) { |
| 20 | + auto cur_shape = input.second.input_shape; |
| 21 | + std::vector<int64_t> shape; |
| 22 | + shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); |
| 23 | + auto type_opt = types[input.first]; |
| 24 | + auto type = at::kFloat; |
| 25 | + if (type_opt) { |
| 26 | + type = type_opt.value(); |
| 27 | + } else { |
| 28 | + LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); |
| 29 | + } |
| 30 | + auto in = at::randint(5, shape, {at::kCUDA}).to(type); |
| 31 | + ivalue_map[input.first] = in.clone(); |
| 32 | + in_i++; |
| 33 | + } |
| 34 | + return ivalue_map; |
| 35 | +} |
| 36 | + |
| 37 | +void getSegmentsOutputByRunning( |
| 38 | + SegmentedBlock& seg_block, |
| 39 | + std::unordered_map<const torch::jit::Value*, torch::jit::IValue>& ivalues_maps, |
| 40 | + const PartitionInfo& partition_info) { |
| 41 | + // create a module to run the graph |
| 42 | + auto g = seg_block.g(); |
| 43 | + auto copy_g = g->copy(); |
| 44 | + |
| 45 | + // create tuple for multiple outputs |
| 46 | + if (seg_block.raw_outputs().size() > 1) { |
| 47 | + auto new_output_node = copy_g->appendNode(copy_g->createTuple(copy_g->outputs())); |
| 48 | + for (int idx = copy_g->outputs().size() - 1; idx >= 0; --idx) { |
| 49 | + copy_g->eraseOutput(idx); |
| 50 | + } |
| 51 | + |
| 52 | + copy_g->registerOutput(new_output_node->outputs()[0]); |
| 53 | + } |
| 54 | + |
| 55 | + torch::jit::script::Module cur_mod(c10::QualifiedName("module")); |
| 56 | + |
| 57 | + auto self = copy_g->insertInput(0, "self_1"); |
| 58 | + self->setType(cur_mod.type()); |
| 59 | + |
| 60 | + auto cur_method = cur_mod._ivalue()->compilation_unit()->create_function(c10::QualifiedName("forward"), copy_g); |
| 61 | + auto schema = util::GenerateGraphSchema(cur_method->name(), copy_g); |
| 62 | + cur_mod.type()->addMethod(cur_method); |
| 63 | + cur_method->setSchema(schema); |
| 64 | + |
| 65 | + std::vector<torch::jit::IValue> jit_inputs_ivalues; |
| 66 | + |
| 67 | + // set inputs ivalues, now supports Tensor/Int to pass argumentes between different segments |
| 68 | + for (auto& input : seg_block.raw_inputs()) { |
| 69 | + TORCHTRT_CHECK( |
| 70 | + ivalues_maps.count(input), |
| 71 | + "Could not find torch::jit::Value* " << input->debugName() << " produced from " |
| 72 | + << util::node_info(input->node()) |
| 73 | + << " in lowering graph for mini graph input.\n"); |
| 74 | + if (input->node()->kind() == torch::jit::prim::Param) { |
| 75 | + jit_inputs_ivalues.push_back(ivalues_maps[input]); |
| 76 | + } else if (input->type()->isSubtypeOf(torch::jit::TensorType::get())) { |
| 77 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toTensor()); |
| 78 | + } else if (input->type()->isSubtypeOf(torch::jit::IntType::get())) { |
| 79 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toInt()); |
| 80 | + } else if (input->type()->isSubtypeOf(torch::jit::BoolType::get())) { |
| 81 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); |
| 82 | + } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { |
| 83 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); |
| 84 | + } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { |
| 85 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); |
| 86 | + } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { |
| 87 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toScalar()); |
| 88 | + } else if (input->type()->kind() == torch::jit::TypeKind::DictType) { |
| 89 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toGenericDict()); |
| 90 | + } else if (input->type()->kind() == torch::jit::TypeKind::DeviceObjType) { |
| 91 | + jit_inputs_ivalues.push_back(ivalues_maps[input].toDevice()); |
| 92 | + } else { |
| 93 | + TORCHTRT_THROW_ERROR( |
| 94 | + "Expected to find type " << input->type()->str() << " for value " << input->debugName() |
| 95 | + << " but get nothing. "); |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + // run segments to get outputs for later segments input shape, and other arguments such as Int |
| 100 | + std::vector<torch::jit::IValue> jit_results; |
| 101 | + torch::jit::IValue jit_results_ivalues = cur_mod.forward(jit_inputs_ivalues); |
| 102 | + |
| 103 | + if (jit_results_ivalues.isTuple()) { |
| 104 | + auto results = jit_results_ivalues.toTuple()->elements(); |
| 105 | + for (auto r : results) { |
| 106 | + jit_results.push_back(r); |
| 107 | + } |
| 108 | + } else { |
| 109 | + jit_results.push_back(jit_results_ivalues); |
| 110 | + } |
| 111 | + |
| 112 | + size_t idx = 0; |
| 113 | + for (auto& output : seg_block.raw_outputs()) { |
| 114 | + ivalues_maps[output] = jit_results[idx++]; |
| 115 | + } |
| 116 | + |
| 117 | + // set input shape for each segmented block so we wil use it in conversion process |
| 118 | + std::vector<ir::Input> input_shapes; |
| 119 | + std::vector<at::ScalarType> input_types; |
| 120 | + for (auto& i : seg_block.raw_inputs()) { |
| 121 | + if (ivalues_maps[i].isTensor()) { |
| 122 | + // set the input_shape and data_type |
| 123 | + // we can use a temp value here instead of replacing the values in ivalues_map since we only use ivalues_map for |
| 124 | + // shape inference |
| 125 | + auto cur_ivalue = ivalues_maps[i]; |
| 126 | + at::ScalarType t = cur_ivalue.toTensor().scalar_type(); |
| 127 | + if (!partition_info.truncate_long_and_double && (t == at::kLong || t == at::kDouble)) { |
| 128 | + TORCHTRT_THROW_ERROR( |
| 129 | + "Unable to process subgraph input type of at::kLong/at::kDouble, try to compile model with truncate_long_and_double enabled"); |
| 130 | + } else if (partition_info.truncate_long_and_double && t == at::kLong) { |
| 131 | + cur_ivalue = cur_ivalue.toTensor().to(at::kInt); |
| 132 | + LOG_WARNING("Truncating graph input type from at::kLong to at::kInt"); |
| 133 | + } else if (partition_info.truncate_long_and_double && t == at::kDouble) { |
| 134 | + cur_ivalue = cur_ivalue.toTensor().to(at::kFloat); |
| 135 | + LOG_WARNING("Truncating graph input type from at::kDouble to at::kFloat"); |
| 136 | + } |
| 137 | + c10::optional<nvinfer1::DataType> dtype = util::optTypeMetaToTRTDataType(cur_ivalue.toTensor().dtype()); |
| 138 | + if (dtype == c10::nullopt) { |
| 139 | + TORCHTRT_THROW_ERROR("Unsupported input data type " << cur_ivalue.toTensor().dtype()); |
| 140 | + } |
| 141 | + if (cur_ivalue.toTensor().sizes().size() == 0) { |
| 142 | + // handle Scalar types, which has sizes of [] |
| 143 | + input_shapes.push_back(util::toVec(util::toDims(c10::List<int64_t>({1})))); |
| 144 | + } else { |
| 145 | + input_shapes.push_back(util::toVec(util::toDims(cur_ivalue.toTensor().sizes()))); |
| 146 | + } |
| 147 | + input_types.push_back(cur_ivalue.toTensor().scalar_type()); |
| 148 | + } |
| 149 | + } |
| 150 | + |
| 151 | + seg_block.register_inshapes(input_shapes); |
| 152 | + seg_block.register_intypes(input_types); |
| 153 | +} |
| 154 | + |
| 155 | +void runShapeAnalysis( |
| 156 | + std::vector<SegmentedBlock>& segmented_blocks, |
| 157 | + std::unordered_map<const torch::jit::Value*, torch::jit::IValue>& example_tensor_map, |
| 158 | + const PartitionInfo& partition_info) { |
| 159 | + // register every segment's input shape, and it's running output IValues |
| 160 | + for (auto& seg_block : segmented_blocks) { |
| 161 | + torch::jit::ConstantPooling(seg_block.g()); |
| 162 | + getSegmentsOutputByRunning(seg_block, example_tensor_map, partition_info); |
| 163 | + } |
| 164 | + return; |
| 165 | +} |
| 166 | + |
| 167 | +} // namespace partitioning |
| 168 | +} // namespace core |
| 169 | +} // namespace torch_tensorrt |
0 commit comments