|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include "flang/Runtime/CUDA/kernel.h"
|
| 10 | +#include "flang-rt/runtime/descriptor.h" |
10 | 11 | #include "flang-rt/runtime/terminator.h"
|
11 | 12 | #include "flang/Runtime/CUDA/common.h"
|
12 | 13 |
|
@@ -74,9 +75,9 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
|
74 | 75 | Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
|
75 | 76 | terminator.Crash("Too many invalid grid dimensions");
|
76 | 77 | }
|
77 |
| - cudaStream_t cuStream = 0; // TODO stream managment |
78 |
| - CUDA_REPORT_IF_ERROR( |
79 |
| - cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, cuStream)); |
| 78 | + cudaStream_t defaultStream = 0; |
| 79 | + CUDA_REPORT_IF_ERROR(cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, |
| 80 | + stream != kNoAsyncId ? (cudaStream_t)stream : defaultStream)); |
80 | 81 | }
|
81 | 82 |
|
82 | 83 | void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
|
@@ -140,7 +141,11 @@ void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
|
140 | 141 | terminator.Crash("Too many invalid grid dimensions");
|
141 | 142 | }
|
142 | 143 | config.dynamicSmemBytes = smem;
|
143 |
| - config.stream = 0; // TODO stream managment |
| 144 | + if (stream != kNoAsyncId) { |
| 145 | + config.stream = (cudaStream_t)stream; |
| 146 | + } else { |
| 147 | + config.stream = 0; |
| 148 | + } |
144 | 149 | cudaLaunchAttribute launchAttr[1];
|
145 | 150 | launchAttr[0].id = cudaLaunchAttributeClusterDimension;
|
146 | 151 | launchAttr[0].val.clusterDim.x = clusterX;
|
@@ -212,9 +217,10 @@ void RTDEF(CUFLaunchCooperativeKernel)(const void *kernel, intptr_t gridX,
|
212 | 217 | Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
|
213 | 218 | terminator.Crash("Too many invalid grid dimensions");
|
214 | 219 | }
|
215 |
| - cudaStream_t cuStream = 0; // TODO stream managment |
216 |
| - CUDA_REPORT_IF_ERROR(cudaLaunchCooperativeKernel( |
217 |
| - kernel, gridDim, blockDim, params, smem, cuStream)); |
| 220 | + cudaStream_t defaultStream = 0; |
| 221 | + CUDA_REPORT_IF_ERROR( |
| 222 | + cudaLaunchCooperativeKernel(kernel, gridDim, blockDim, params, smem, |
| 223 | + stream != kNoAsyncId ? (cudaStream_t)stream : defaultStream)); |
218 | 224 | }
|
219 | 225 |
|
220 | 226 | } // extern "C"
|
0 commit comments