Skip to content

Commit 52152d0

Browse files
gs-olivebowang007
authored andcommitted
fix: Bugfix in shape analysis for multi-GPU systems
- Shape analysis code in partitioning defaults dry-run tensors to cuda:0 despite user-specified devices - This leads to errors about device casting for internal tensors, which users cannot cast - Add GPU-ID function arguments in functions to generate new tensors on the user-specified (or default) device
1 parent bd27e9d commit 52152d0

File tree

3 files changed

+27
-11
lines changed

3 files changed

+27
-11
lines changed

core/partitioning/partitioning.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -542,14 +542,26 @@ bool isInputDynamic(PartitioningCtx* ctx) {
542542
void populateInputIValues(PartitioningCtx* ctx) {
543543
if (isInputDynamic(ctx)) {
544544
ctx->min_input_ivalues_map = partitioning::generateRandomInputs(
545-
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMIN);
545+
ctx->settings.collection_input_spec_map,
546+
ctx->input_types_map,
547+
ir::ShapeMode::kMIN,
548+
ctx->settings.target_device.gpu_id);
546549
ctx->opt_input_ivalues_map = partitioning::generateRandomInputs(
547-
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT);
550+
ctx->settings.collection_input_spec_map,
551+
ctx->input_types_map,
552+
ir::ShapeMode::kOPT,
553+
ctx->settings.target_device.gpu_id);
548554
ctx->max_input_ivalues_map = partitioning::generateRandomInputs(
549-
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kMAX);
555+
ctx->settings.collection_input_spec_map,
556+
ctx->input_types_map,
557+
ir::ShapeMode::kMAX,
558+
ctx->settings.target_device.gpu_id);
550559
} else {
551560
ctx->opt_input_ivalues_map = partitioning::generateRandomInputs(
552-
ctx->settings.collection_input_spec_map, ctx->input_types_map, ir::ShapeMode::kOPT);
561+
ctx->settings.collection_input_spec_map,
562+
ctx->input_types_map,
563+
ir::ShapeMode::kOPT,
564+
ctx->settings.target_device.gpu_id);
553565
}
554566
}
555567

core/partitioning/partitioning.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ const std::unordered_set<c10::Symbol> CollectionNodeKinds = {
3434
ExampleIValues generateRandomInputs(
3535
ir::CollectionInputSpecMap& input_ranges,
3636
ir::CollectionTypeMap& input_types,
37-
const ir::ShapeMode& shape_mode = ir::ShapeMode::kOPT);
37+
const ir::ShapeMode& shape_mode = ir::ShapeMode::kOPT,
38+
int64_t gpu_id = 0);
3839

3940
void populateInputIValues(PartitioningCtx* ctx);
4041

core/partitioning/shape_analysis.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ namespace partitioning {
1313
at::Tensor generateSingleInput(
1414
ir::Input& input,
1515
c10::optional<at::ScalarType>& type_opt,
16-
const ir::ShapeMode& shape_mode) {
16+
const ir::ShapeMode& shape_mode,
17+
int64_t gpu_id) {
1718
nvinfer1::Dims input_shape = input.input_shape;
1819
if (input.input_is_dynamic) {
1920
if (shape_mode == ir::ShapeMode::kMIN) {
@@ -42,15 +43,17 @@ at::Tensor generateSingleInput(
4243

4344
// Make the value range for input tensor a uniform (float) distribution
4445
// over [LoValIncl, HiValExcl), then cast to the desired dtype
45-
auto in = ((HiValExcl - LoValIncl) * at::rand(util::toVec(input_shape), {at::kCUDA}) + LoValIncl).to(type);
46+
auto in = ((HiValExcl - LoValIncl) * at::rand(util::toVec(input_shape)) + LoValIncl)
47+
.to(at::Device(at::kCUDA, gpu_id), type);
4648

4749
return in;
4850
}
4951

5052
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomInputs(
5153
std::unordered_map<const torch::jit::Value*, std::vector<ir::Input>>& inputs,
5254
std::unordered_map<const torch::jit::Value*, std::vector<c10::optional<at::ScalarType>>>& types,
53-
const ir::ShapeMode& shape_mode) {
55+
const ir::ShapeMode& shape_mode,
56+
int64_t gpu_id) {
5457
// generate random inputs for running pytorch segments
5558
std::unordered_map<const torch::jit::Value*, torch::jit::IValue> ivalue_map;
5659

@@ -59,21 +62,21 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
5962
c10::TypePtr elementType = c10::TensorType::get();
6063
auto generic_list = c10::impl::GenericList(elementType);
6164
for (size_t i = 0; i < input.second.size(); i++) {
62-
auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode);
65+
auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode, gpu_id);
6366
generic_list.push_back(in.clone());
6467
}
6568
ivalue_map[input.first] = c10::IValue(generic_list);
6669
} else if (input.first->type()->kind() == torch::jit::TypeKind::TupleType) {
6770
// create tuple
6871
std::vector<torch::jit::IValue> list;
6972
for (size_t i = 0; i < input.second.size(); i++) {
70-
auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode);
73+
auto in = generateSingleInput(input.second[i], types[input.first][i], shape_mode, gpu_id);
7174
list.push_back(in.clone());
7275
}
7376
auto tuple = c10::ivalue::Tuple::create(list); // create tuple ptr
7477
ivalue_map[input.first] = c10::IValue(tuple);
7578
} else {
76-
auto in = generateSingleInput(input.second[0], types[input.first][0], shape_mode);
79+
auto in = generateSingleInput(input.second[0], types[input.first][0], shape_mode, gpu_id);
7780
ivalue_map[input.first] = in.clone();
7881
}
7982
}

0 commit comments

Comments
 (0)