Skip to content

Commit a6d03f3

Browse files
[SYCL][CUDA][LIT] Fix base address get and subbuffer LIT test (#1913)
This commit fixes a hardcoded value for base address alignment in the cuda backend. Also a hardcoded value that should be dependant on the base address alignment is fixed in the subbuffer LIT test, which now passes on CUDA. Signed-off-by: Przemek Malon <[email protected]>
1 parent 010f112 commit a6d03f3

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -951,12 +951,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
951951
size_t{4000u});
952952
}
953953
case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: {
954-
// TODO: is this config consistent across all NVIDIA GPUs?
955-
// "The minimum value is the size (in bits) of the largest OpenCL built-in
956-
// data type supported by the device"
957-
// Hard coded to value returned by clinfo for OpenCL 1.2 CUDA | GeForce GTX
958-
// 1060 3GB
959-
return getInfo(param_value_size, param_value, param_value_size_ret, 4096u);
954+
int mem_base_addr_align = 0;
955+
cl::sycl::detail::pi::assertion(
956+
cuDeviceGetAttribute(&mem_base_addr_align,
957+
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
958+
device->get()) == CUDA_SUCCESS);
959+
// Multiply by 8 as clGetDeviceInfo returns this value in bits
960+
mem_base_addr_align *= 8;
961+
return getInfo(param_value_size, param_value, param_value_size_ret,
962+
mem_base_addr_align);
960963
}
961964
case PI_DEVICE_INFO_HALF_FP_CONFIG: {
962965
// TODO: is this config consistent across all NVIDIA GPUs?

sycl/test/basic_tests/buffer/subbuffer.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
// XFAIL: cuda
2-
// TODO: Fix CUDA implementation.
3-
//
41
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
52
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
63
// RUN: %CPU_RUN_PLACEHOLDER %t.out
@@ -112,12 +109,12 @@ void check1DSubBuffer(cl::sycl::queue &q) {
112109
assert(false && "Exception was caught");
113110
}
114111

115-
for (int i = offset; i < subbuf_size; ++i)
116-
assert(vec[i] == (i > 34 ? i * 10 : i * -10) &&
112+
for (int i = offset; i < offset + subbuf_size; ++i)
113+
assert(vec[i] == (i < offset + offset_inside_subbuf ? i * 10 : i * -10) &&
117114
"Invalid result in 1d sub buffer");
118115

119116
for (int i = 0; i < subbuf_size; ++i)
120-
assert(vec2[i] == (i < 3 ? (32 + i) : (32 + i) * -1) &&
117+
assert(vec2[i] == (i < 3 ? (offset + i) : (offset + i) * -1) &&
121118
"Invalid result in 1d sub buffer");
122119
}
123120

0 commit comments

Comments
 (0)