Skip to content

Commit 9cc8aab

Browse files
pruthvistonydnikolaev-amd
authored andcommitted
CONSOLIDATED COMMITS: Enable tensorpipe with hip_basic backend
============================================================== Enable tensorpipe with hip_basic backend (#1135) * Add hip_basic tensorpipe support to PyTorch * Enabling hip_basic for Tensorpipe for pyTorch * removing upstream tensorpipe module * Adding ROCm specific tensopipe submodule * tensorpipe submodule updated * Update the hip invalid device string * Added ignore for tensorpipe git submodule * Moved include of tensorpipe_cuda.h to hipify * Updates based on review comments * Defining the variable __HIP_PLATFORM_AMD__ * Enabling the UTs Co-authored-by: Ronak Malik <[email protected]> Update tensorpipe submodule to support ROCm 6.0 (cherry picked from commit 0e96f1f)
1 parent 79e5d25 commit 9cc8aab

File tree

5 files changed

+23
-8
lines changed

5 files changed

+23
-8
lines changed

.gitmodules

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@
8282
ignore = dirty
8383
path = third_party/fmt
8484
url = https://github.com/fmtlib/fmt.git
85-
[submodule "third_party/tensorpipe"]
86-
ignore = dirty
87-
path = third_party/tensorpipe
88-
url = https://github.com/pytorch/tensorpipe.git
8985
[submodule "third_party/cudnn_frontend"]
9086
path = third_party/cudnn_frontend
9187
url = https://github.com/NVIDIA/cudnn-frontend.git
@@ -133,3 +129,8 @@
133129
[submodule "third_party/flash-attention"]
134130
path = third_party/flash-attention
135131
url = https://github.com/Dao-AILab/flash-attention.git
132+
[submodule "third_party/tensorpipe"]
133+
ignore = dirty
134+
path = third_party/tensorpipe
135+
url = https://github.com/ROCmSoftwarePlatform/tensorpipe.git
136+
branch = tp_rocm_60

cmake/Dependencies.cmake

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,14 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
11651165
set(TP_USE_CUDA ON CACHE BOOL "" FORCE)
11661166
set(TP_ENABLE_CUDA_IPC ON CACHE BOOL "" FORCE)
11671167
endif()
1168+
if(USE_ROCM)
1169+
add_compile_options(-D__HIP_PLATFORM_AMD__=1)
1170+
set(TP_USE_ROCM ON CACHE BOOL "" FORCE)
1171+
set(TP_ENABLE_HIP_IPC OFF CACHE BOOL "" FORCE)
1172+
set(TP_ENABLE_HIP_XTH OFF CACHE BOOL "" FORCE)
1173+
set(TP_ENABLE_HIP_GDR OFF CACHE BOOL "" FORCE)
1174+
set(TP_ENABLE_IBV OFF CACHE BOOL "" FORCE)
1175+
endif()
11681176
set(TP_BUILD_LIBUV ON CACHE BOOL "" FORCE)
11691177
add_compile_options(-DTORCH_USE_LIBUV)
11701178
include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/tensorpipe/third_party/libuv/include)
@@ -1186,9 +1194,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
11861194
if(USE_CUDA)
11871195
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS tensorpipe_cuda)
11881196
elseif(USE_ROCM)
1189-
message(WARNING "TensorPipe doesn't yet support ROCm")
1197+
message(WARNING "TensorPipe is supported on ROCm")
11901198
# Not yet...
1191-
# list(APPEND Caffe2_HIP_DEPENDENCY_LIBS tensorpipe_hip)
1199+
list(APPEND Caffe2_HIP_DEPENDENCY_LIBS tensorpipe_hip)
11921200
endif()
11931201
endif()
11941202
endif()

third_party/tensorpipe

torch/csrc/distributed/rpc/tensorpipe_cuda.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <torch/csrc/distributed/rpc/tensorpipe_agent.h>
22
#include <torch/csrc/distributed/rpc/tensorpipe_utils.h>
33

4-
#if defined(USE_TENSORPIPE) && !defined(USE_ROCM)
4+
#if defined(USE_TENSORPIPE)
55

66
#include <c10/cuda/CUDACachingAllocator.h>
77
#include <c10/cuda/CUDAGuard.h>
@@ -48,6 +48,8 @@ C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel)
4848

4949
#endif
5050

51+
#if TENSORPIPE_HAS_CUDA_XTH_CHANNEL
52+
5153
std::unique_ptr<ChannelRegistration> makeCudaXthChannel() {
5254
auto context = tensorpipe::channel::cuda_xth::create();
5355
return std::make_unique<ChannelRegistration>(
@@ -57,6 +59,8 @@ std::unique_ptr<ChannelRegistration> makeCudaXthChannel() {
5759
// The cuda_xth channel supports same-process GPU-to-GPU comm
5860
C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel)
5961

62+
#endif
63+
6064
std::unique_ptr<ChannelRegistration> makeCudaBasicChannel() {
6165
auto context = tensorpipe::channel::cuda_basic::create(
6266
tensorpipe::channel::basic::create());

torch/testing/_internal/distributed/rpc/rpc_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
skip_if_lt_x_gpu,
3333
captured_output,
3434
tp_transports,
35+
skip_if_rocm,
3536
)
3637
from torch.testing._internal.common_utils import (
3738
IS_MACOS,
@@ -5052,6 +5053,7 @@ def test_dynamic_rpc_existing_rank_can_communicate_with_new_rank(self):
50525053

50535054
# Dynamic RPC existing ranks can communicate with new ranks using CUDA rpc
50545055
@skip_if_lt_x_gpu(2)
5056+
@skip_if_rocm
50555057
@dist_init(setup_rpc=False)
50565058
def test_dynamic_rpc_existing_rank_can_communicate_with_new_rank_cuda(self):
50575059
initialize_pg(self.file_init_method, self.rank, self.world_size)

0 commit comments

Comments
 (0)