Skip to content

Commit 05b5899

Browse files
authored
Merge pull request #1181 from DuncanMcBain/cuda-mem-size-fix
Return device total global memory for MaxAllocSize
2 parents ecdd159 + c02d137 commit 05b5899

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

source/adapters/cuda/device.hpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,11 @@ struct ur_device_handle_t_ {
6868
}
6969

7070
// Max size of memory object allocation in bytes.
71-
// The minimum value is max(min(1024 × 1024 ×
72-
// 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE),
73-
// 32 × 1024 × 1024) for devices that are not of type
74-
// CL_DEVICE_TYPE_CUSTOM.
75-
size_t Global = 0;
76-
UR_CHECK_ERROR(cuDeviceTotalMem(&Global, cuDevice));
77-
78-
auto QuarterGlobal = static_cast<uint32_t>(Global / 4u);
79-
80-
MaxAllocSize = std::max(std::min(1024u * 1024u * 1024u, QuarterGlobal),
81-
32u * 1024u * 1024u);
71+
// The minimum value is max (1/4th of info::device::global_mem_size,
72+
// 128*1024*1024) if this SYCL device is not device_type::custom.
73+
// CUDA doesn't really have this concept, and could allow almost 100% of
74+
// global memory in one allocation, but is dependent on device usage.
75+
UR_CHECK_ERROR(cuDeviceTotalMem(&MaxAllocSize, cuDevice));
8276
}
8377

8478
~ur_device_handle_t_() { cuDevicePrimaryCtxRelease(CuDevice); }

0 commit comments

Comments
 (0)