Skip to content

Commit efd3dad

Browse files
committed
[Nested Tensor] use at::cuda::getCurrentCUDAStream(), not getDefaultCUDAStream()
Otherwise, add/remove padding kernels won't sync with current stream, resulting in flaky unit tests in test_nestedtensor.py.
1 parent bce13d4 commit efd3dad

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ void remove_padding_kernelLauncher(
146146
dim3 grid;
147147
grid.x = batch_size;
148148
grid.y = GRID_DIM_Y;
149-
at::cuda::CUDAStream stream = at::cuda::getDefaultCUDAStream();
149+
at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
150150
if (output_dim == 2) {
151151
remove_padding_2<T><<<grid, BLOCK_DIM, 0, stream>>>(
152152
input,
@@ -180,7 +180,7 @@ void remove_padding_transform0213_kernelLauncher(
180180
dim3 grid;
181181
grid.x = batch_size;
182182
grid.y = GRID_DIM_Y;
183-
at::cuda::CUDAStream stream = at::cuda::getDefaultCUDAStream();
183+
at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
184184
TORCH_CHECK(
185185
output_dim == 2,
186186
"remove padding transform0213 only support output dim == 2");
@@ -374,7 +374,7 @@ void add_padding_kernelLauncher(
374374
const std::vector<int64_t>& output_sizes,
375375
const int batch_size,
376376
const int output_batch_size) {
377-
at::cuda::CUDAStream stream = at::cuda::getDefaultCUDAStream();
377+
at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
378378
dim3 grid;
379379
grid.x = output_batch_size;
380380
grid.y = GRID_DIM_Y;

0 commit comments

Comments
 (0)