Skip to content

Commit 0e96f1f

Browse files
committed
CONSOLIDATED COMMITS: Enable tensorpipe with hip_basic backend
============================================================== Enable tensorpipe with hip_basic backend (#1135) * Add hip_basic tensorpipe support to PyTorch * Enabling hip_basic for Tensorpipe for pyTorch * removing upstream tensorpipe module * Adding ROCm specific tensopipe submodule * tensorpipe submodule updated * Update the hip invalid device string * Added ignore for tensorpipe git submodule * Moved include of tensorpipe_cuda.h to hipify * Updates based on review comments * Defining the variable __HIP_PLATFORM_AMD__ * Enabling the UTs Co-authored-by: Ronak Malik <[email protected]> Update tensorpipe submodule to support ROCm 6.0
1 parent b966e44 commit 0e96f1f

File tree

6 files changed

+23
-11
lines changed

6 files changed

+23
-11
lines changed

.gitmodules

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,6 @@
8686
ignore = dirty
8787
path = third_party/fmt
8888
url = https://github.com/fmtlib/fmt.git
89-
[submodule "third_party/tensorpipe"]
90-
ignore = dirty
91-
path = third_party/tensorpipe
92-
url = https://github.com/pytorch/tensorpipe.git
9389
[submodule "third_party/cudnn_frontend"]
9490
path = third_party/cudnn_frontend
9591
url = https://github.com/NVIDIA/cudnn-frontend.git
@@ -134,3 +130,8 @@
134130
[submodule "third_party/kleidiai"]
135131
path = third_party/kleidiai
136132
url = https://git.gitlab.arm.com/kleidi/kleidiai.git
133+
[submodule "third_party/tensorpipe"]
134+
ignore = dirty
135+
path = third_party/tensorpipe
136+
url = https://github.com/ROCmSoftwarePlatform/tensorpipe.git
137+
branch = tp_rocm_60

cmake/Dependencies.cmake

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,6 +1144,14 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
11441144
set(TP_USE_CUDA ON CACHE BOOL "" FORCE)
11451145
set(TP_ENABLE_CUDA_IPC ON CACHE BOOL "" FORCE)
11461146
endif()
1147+
if(USE_ROCM)
1148+
add_compile_options(-D__HIP_PLATFORM_AMD__=1)
1149+
set(TP_USE_ROCM ON CACHE BOOL "" FORCE)
1150+
set(TP_ENABLE_HIP_IPC OFF CACHE BOOL "" FORCE)
1151+
set(TP_ENABLE_HIP_XTH OFF CACHE BOOL "" FORCE)
1152+
set(TP_ENABLE_HIP_GDR OFF CACHE BOOL "" FORCE)
1153+
set(TP_ENABLE_IBV OFF CACHE BOOL "" FORCE)
1154+
endif()
11471155
set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE)
11481156
add_compile_options(-DTORCH_USE_LIBUV)
11491157
include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/tensorpipe/third_party/libuv/include)
@@ -1158,9 +1166,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
11581166
if(USE_CUDA)
11591167
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS tensorpipe_cuda)
11601168
elseif(USE_ROCM)
1161-
message(WARNING "TensorPipe doesn't yet support ROCm")
1169+
message(WARNING "TensorPipe is supported on ROCm")
11621170
# Not yet...
1163-
# list(APPEND Caffe2_HIP_DEPENDENCY_LIBS tensorpipe_hip)
1171+
list(APPEND Caffe2_HIP_DEPENDENCY_LIBS tensorpipe_hip)
11641172
endif()
11651173
endif()
11661174
endif()

test/run_test.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,7 @@ def __contains__(self, item):
166166
] + FSDP_TEST
167167

168168
ROCM_BLOCKLIST = [
169-
"distributed/rpc/test_faulty_agent",
170-
"distributed/rpc/test_tensorpipe_agent",
171169
"distributed/rpc/test_share_memory",
172-
"distributed/rpc/cuda/test_tensorpipe_agent",
173170
"distributed/_shard/checkpoint/test_checkpoint"
174171
"distributed/_shard/checkpoint/test_file_system_checkpoint"
175172
"distributed/_shard/sharding_spec/test_sharding_spec",

third_party/tensorpipe

torch/csrc/distributed/rpc/tensorpipe_cuda.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <torch/csrc/distributed/rpc/tensorpipe_agent.h>
22
#include <torch/csrc/distributed/rpc/tensorpipe_utils.h>
33

4-
#if defined(USE_TENSORPIPE) && !defined(USE_ROCM)
4+
#if defined(USE_TENSORPIPE)
55

66
#include <c10/cuda/CUDACachingAllocator.h>
77
#include <c10/cuda/CUDAGuard.h>
@@ -48,6 +48,8 @@ C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel);
4848

4949
#endif
5050

51+
#if TENSORPIPE_HAS_CUDA_XTH_CHANNEL
52+
5153
std::unique_ptr<ChannelRegistration> makeCudaXthChannel() {
5254
auto context = tensorpipe::channel::cuda_xth::create();
5355
return std::make_unique<ChannelRegistration>(
@@ -57,6 +59,8 @@ std::unique_ptr<ChannelRegistration> makeCudaXthChannel() {
5759
// The cuda_xth channel supports same-process GPU-to-GPU comm
5860
C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel);
5961

62+
#endif
63+
6064
std::unique_ptr<ChannelRegistration> makeCudaBasicChannel() {
6165
auto context = tensorpipe::channel::cuda_basic::create(
6266
tensorpipe::channel::basic::create());

torch/testing/_internal/distributed/rpc/rpc_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
skip_if_lt_x_gpu,
3333
captured_output,
3434
tp_transports,
35+
skip_if_rocm,
3536
)
3637
from torch.testing._internal.common_utils import (
3738
IS_MACOS,
@@ -5054,6 +5055,7 @@ def test_dynamic_rpc_existing_rank_can_communicate_with_new_rank(self):
50545055

50555056
# Dynamic RPC existing ranks can communicate with new ranks using CUDA rpc
50565057
@skip_if_lt_x_gpu(2)
5058+
@skip_if_rocm
50575059
@dist_init(setup_rpc=False)
50585060
def test_dynamic_rpc_existing_rank_can_communicate_with_new_rank_cuda(self):
50595061
initialize_pg(self.file_init_method, self.rank, self.world_size)

0 commit comments

Comments
 (0)