Skip to content

Commit 618c54e

Browse files
committed
gpt-fast running now with intra-node comm
1 parent 48d1c33 commit 618c54e

File tree

2 files changed

+2
-1
lines changed

2 files changed

+2
-1
lines changed

torch/csrc/distributed/c10d/intra_node_comm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ bool IntraNodeComm::rendezvous() {
330330
auto ret = rsmi_init(0);
331331
if (ret != RSMI_STATUS_SUCCESS) {
332332
LOG(ERROR) << "IntraNodeComm:: rendezvous failed in rsmi_init, ret=" << ret;
333-
return nullptr;
333+
return false;
334334
}
335335
#endif
336336

torch/csrc/distributed/c10d/intra_node_comm.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ DEVICE_INLINE void releaseSignal(uint32_t* addr) {
120120
CUDA_KERNEL_ASSERT(false);
121121
#else
122122
atomicAdd_system(addr, 1);
123+
__threadfence_system();
123124
#endif
124125
}
125126

0 commit comments

Comments
 (0)