|
55 | 55 |
|
56 | 56 | thread_local static int32_t defaultDevice = 0;
|
57 | 57 |
|
| 58 | +const char *kDebugEnvironmentVariable = "MLIR_CUDA_DEBUG"; |
| 59 | + |
| 60 | +/// Helper method that checks environment value for debugging. |
| 61 | +bool isDebugEnabled() { |
| 62 | + static bool isInitialized = false; |
| 63 | + static bool isEnabled = false; |
| 64 | + if (!isInitialized) |
| 65 | + isEnabled = getenv(kDebugEnvironmentVariable) != nullptr; |
| 66 | + return isEnabled; |
| 67 | +} |
| 68 | + |
| 69 | +#define debug_print(fmt, ...) \ |
| 70 | + do { \ |
| 71 | + if (isDebugEnabled()) \ |
| 72 | + fprintf(stderr, "%s:%d:%s(): " fmt, "CudaRuntimeWrappers.cpp", __LINE__, \ |
| 73 | + __func__, __VA_ARGS__); \ |
| 74 | + } while (0) |
| 75 | + |
58 | 76 | // Make the primary context of the current default device current for the
|
59 | 77 | // duration
|
60 | 78 | // of the instance and restore the previous context on destruction.
|
@@ -273,6 +291,24 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuTensorMapEncodeTiled(
|
273 | 291 | tensorMap, tensorDataType, tensorRank, globalAddress, globalDim,
|
274 | 292 | globalStrides, boxDim, elementStrides, interleave, swizzle, l2Promotion,
|
275 | 293 | oobFill));
|
| 294 | + debug_print("Created TMA descriptor\n Addr: %p\n" |
| 295 | + "data type : %d\n" |
| 296 | + "rank : %d\n" |
| 297 | + "globalDim[5]: %zu, %zu, %zu, %zu, %zu\n" |
| 298 | + "globalStrides[5]: %zu, %zu, %zu, %zu, %zu\n" |
| 299 | + "boxDim[5]: %u, %u, %u, %u, %u\n" |
| 300 | + "elementStrides[5]: %u, %u, %u, %u, %u\n" |
| 301 | + "interleave: %u \n" |
| 302 | + "swizzle: %u \n" |
| 303 | + "l2Promotion: %u \n" |
| 304 | + "oobFill: %u \n", |
| 305 | + (void *)&tensorMap, tensorDataType, tensorRank, globalDim[0], |
| 306 | + globalDim[1], globalDim[2], globalDim[3], globalDim[4], |
| 307 | + globalStrides[0], globalStrides[1], globalStrides[2], |
| 308 | + globalStrides[3], globalStrides[4], boxDim[0], boxDim[1], |
| 309 | + boxDim[2], boxDim[3], boxDim[4], elementStrides[0], |
| 310 | + elementStrides[1], elementStrides[2], elementStrides[3], |
| 311 | + elementStrides[4], interleave, swizzle, l2Promotion, oobFill); |
276 | 312 | }
|
277 | 313 |
|
278 | 314 | extern "C" MLIR_CUDA_WRAPPERS_EXPORT void *mgpuTensorMapEncodeTiledMemref(
|
|
0 commit comments