Skip to content

Commit c300e68

Browse files
committed
CUDA/HIP: add warp_size to cuda_device_info
1 parent 3d804de commit c300e68

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ struct ggml_cuda_device_info {
520520
bool vmm; // virtual memory support
521521
size_t vmm_granularity; // granularity of virtual memory
522522
size_t total_vram;
523+
int warp_size; // Number of threads in a dispatch
523524
};
524525

525526
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
242242

243243
info.devices[id].nsm = prop.multiProcessorCount;
244244
info.devices[id].smpb = prop.sharedMemPerBlock;
245+
info.devices[id].warp_size = prop.warpSize;
245246
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
246247
info.devices[id].smpbo = prop.sharedMemPerBlock;
247248

@@ -256,8 +257,9 @@ static ggml_cuda_device_info ggml_cuda_init() {
256257
info.devices[id].cc += prop.minor * 0x10;
257258
}
258259
}
259-
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n",
260-
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no");
260+
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
261+
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
262+
device_vmm ? "yes" : "no", prop.warpSize);
261263
#else
262264
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
263265
info.devices[id].cc = 100*prop.major + 10*prop.minor;

0 commit comments

Comments
 (0)