Skip to content

WIP: (//core): Added device meta data serialization/deserialization implic… #175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions core/compiler.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <cuda_runtime.h>
#include <iostream>
#include <memory>
#include <sstream>
Expand Down Expand Up @@ -46,8 +47,9 @@ c10::FunctionSchema GenerateGraphSchema(
void AddEngineToGraph(
torch::jit::script::Module mod,
std::shared_ptr<torch::jit::Graph>& g,
std::string& serialized_engine) {
auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name(), serialized_engine);
std::string& engine,
runtime::CudaDevice& device_info) {
auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name(), engine, device_info);
// Get required metadata about the engine out
auto num_io = engine_ptr->num_io;
auto name = engine_ptr->name;
Expand Down Expand Up @@ -157,12 +159,16 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
// torch::jit::script::Module new_mod = mod.clone();
torch::jit::script::Module new_mod(mod._ivalue()->name() + "_trt");
std::vector<std::shared_ptr<torch::jit::Graph>> graphs;

for (const torch::jit::script::Method& method : mod.get_methods()) {
// Don't convert hidden methods
if (method.name().rfind("_", 0)) {
auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
auto new_g = std::make_shared<torch::jit::Graph>();
AddEngineToGraph(new_mod, new_g, engine);

auto device_spec = cfg.convert_info.engine_settings.device;
auto cuda_device = runtime::get_device_info(device_spec.gpu_id, device_spec.device_type);
AddEngineToGraph(new_mod, new_g, engine, cuda_device);
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
auto schema = GenerateGraphSchema(new_mod, new_method->name(), new_g);
new_mod.type()->addMethod(new_method);
Expand All @@ -174,7 +180,7 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
}

void set_device(const int gpu_id) {
TRTORCH_ASSERT(cudaSetDevice(gpu_id) == cudaSuccess, "Unable to set CUDA device: " << gpu_id);
TRTORCH_CHECK((cudaSetDevice(gpu_id) == cudaSuccess), "Unable to set CUDA device: " << gpu_id);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we call runtime::set_cuda_device here just to centralize responsibility for device management in the runtime section?

}

} // namespace core
Expand Down
79 changes: 57 additions & 22 deletions core/conversion/converters/impl/element_wise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ nvinfer1::ILayer* add_elementwise(
return ele;
}

nvinfer1::ITensor* clamp_util(
ConversionCtx* ctx,
const torch::jit::Node* n,
nvinfer1::ITensor* self,
float limit,
nvinfer1::ElementWiseOperation op_type,
std::string str) {
nvinfer1::ITensor* clamp_layer_out = self;
auto limitTensor = tensor_to_const(ctx, torch::tensor({limit}));
auto limit_layer = add_elementwise(ctx, op_type, clamp_layer_out, limitTensor, util::node_info(n) + str);
TRTORCH_CHECK(limit_layer, "Unable to create elementwise " << str << " layer for node: " << *n);
clamp_layer_out = limit_layer->getOutput(0);
return clamp_layer_out;
}

auto element_wise_registrations TRTORCH_UNUSED =
RegisterNodeConversionPatterns()
.pattern({"aten::add.Tensor(Tensor self, Tensor other, Scalar alpha=1) -> "
Expand Down Expand Up @@ -145,38 +160,58 @@ auto element_wise_registrations TRTORCH_UNUSED =
return true;
}})
.pattern({"aten::clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
// Compute min(max(min_threshold, input), max_threshold)
auto self = args[0].ITensorOrFreeze(ctx);
auto clamp_layer_out = self;

if (args[1].isIValue() && args[1].IValue()->isScalar() && args[2].isIValue() &&
args[2].IValue()->isScalar()) {
auto alpha = args[1].unwrapToScalar().to<float>();
auto beta = args[2].unwrapToScalar().to<float>();
auto clip_layer = ctx->net->addActivation(*self, nvinfer1::ActivationType::kCLIP);
TRTORCH_CHECK(clip_layer, "Unable to create clip layer for node: " << *n);
clip_layer->setAlpha(alpha);
clip_layer->setBeta(beta);
clamp_layer_out = clip_layer->getOutput(0);
} else if (args[1].isIValue() && args[1].IValue()->isScalar()) {
auto limit = args[1].unwrapToScalar().to<float>();
clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMAX, "_max");
} else if (args[2].isIValue() && args[2].IValue()->isScalar()) {
auto limit = args[2].unwrapToScalar().to<float>();
clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMIN, "_min");
}

auto out = ctx->AssociateValueAndTensor(n->outputs()[0], clamp_layer_out);
LOG_DEBUG("Clamp layer output tensor shape: " << out->getDimensions());
return true;
}})
.pattern({"aten::clamp_min(Tensor self, Scalar min) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
// Compute min(max(min_threshold, input), max_threshold)
auto self = args[0].ITensorOrFreeze(ctx);
auto clamp_layer_out = self;
if (args[1].isIValue() && args[1].IValue()->isScalar()) {
auto minScalar = args[1].unwrapToScalar().to<float>();
auto minTensor = tensor_to_const(ctx, torch::tensor({minScalar}));
auto max_layer = add_elementwise(
ctx,
nvinfer1::ElementWiseOperation::kMAX,
clamp_layer_out,
minTensor,
util::node_info(n) + std::string("_max"));
TRTORCH_CHECK(max_layer, "Unable to create elementwise max layer for node: " << *n);
clamp_layer_out = max_layer->getOutput(0);
auto limit = args[1].unwrapToScalar().to<float>();
clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMAX, "_max");
}

if (args[2].isIValue() && args[2].IValue()->isScalar()) {
auto maxScalar = args[2].unwrapToScalar().to<float>();
auto maxTensor = tensor_to_const(ctx, torch::tensor({maxScalar}));
auto min_layer = add_elementwise(
ctx,
nvinfer1::ElementWiseOperation::kMIN,
clamp_layer_out,
maxTensor,
util::node_info(n) + std::string("_min"));
TRTORCH_CHECK(min_layer, "Unable to create elementwise min layer for node: " << *n);
clamp_layer_out = min_layer->getOutput(0);
auto out = ctx->AssociateValueAndTensor(n->outputs()[0], clamp_layer_out);
LOG_DEBUG("clamp_min layer output tensor shape: " << out->getDimensions());
return true;
}})
.pattern({"aten::clamp_max(Tensor self, Scalar max) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
// Compute min(max(min_threshold, input), max_threshold)
auto self = args[0].ITensorOrFreeze(ctx);
auto clamp_layer_out = self;
if (args[1].isIValue() && args[1].IValue()->isScalar()) {
auto limit = args[1].unwrapToScalar().to<float>();
clamp_layer_out = clamp_util(ctx, n, self, limit, nvinfer1::ElementWiseOperation::kMIN, "_min");
}

auto out = ctx->AssociateValueAndTensor(n->outputs()[0], clamp_layer_out);
LOG_DEBUG("Clamp layer output tensor shape: " << out->getDimensions());
LOG_DEBUG("clamp_max layer output tensor shape: " << out->getDimensions());
return true;
}})
.pattern({"aten::sub.Tensor(Tensor self, Tensor other, Scalar alpha=1) -> "
Expand Down
Loading