Skip to content

Replace use of queue mgr #1735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 20, 2024
23 changes: 13 additions & 10 deletions doc/dpctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,23 @@ Interplay with the Data Parallel Control Library
An example below demonstrates how the Data Parallel Extension for NumPy* can be
easily combined with the device management interface provided by dpctl package.

Literally, the SYCL* queue manager interface from the dpctl package allows
to set an input queue as the currently usable queue inside the context
manager's scope. This way an array creation function from the dpnp package
which is defined inside the context will allocate the data using that queue.

.. code-block:: python
:linenos:

import dpctl
import dpnp as np
import dpctl
import dpnp

d = dpctl.select_cpu_device()
x = dpnp.array([1, 2, 3], device=d)
s = dpnp.sum(x)

y = dpnp.linspace(0, dpnp.pi, num=10**6, device="gpu")
f = 1 + y * dpnp.sin(y)

# locate argument where function attains global maximum
max_arg = x[dpnp.argmax(f)]
max_val = dpnp.max(f)

with dpctl.device_context("opencl:gpu"):
x = np.array([1, 2, 3])
s = np.sum(x)

For more information please refer to `Data Parallel Control Library`_
documentation.
Expand Down
2 changes: 0 additions & 2 deletions dpnp/backend/examples/example10.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ void test_dpnp_random_normal(const size_t size,
double dev_time_used = 0.0;
double sum_dev_time_used = 0.0;

dpnp_queue_initialize_c(QueueOptions::GPU_SELECTOR);

double *result = (double *)dpnp_memory_alloc_c(size * sizeof(double));

dpnp_rng_srand_c(seed); // TODO: will move
Expand Down
1 change: 0 additions & 1 deletion dpnp/backend/examples/example3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ int main(int, char **)
{
const size_t size = 256;

dpnp_queue_initialize_c();
std::cout << "SYCL queue is CPU: " << dpnp_queue_is_cpu_c() << std::endl;

int *array1 = (int *)dpnp_memory_alloc_c(size * sizeof(int));
Expand Down
2 changes: 0 additions & 2 deletions dpnp/backend/examples/example5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ int main(int, char **)
{
const size_t size = 256;

dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR);

double *result = (double *)dpnp_memory_alloc_c(size * sizeof(double));

size_t seed = 10;
Expand Down
2 changes: 0 additions & 2 deletions dpnp/backend/examples/example7.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ int main(int, char **)
const size_t size = 2;
size_t len = size * size;

dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR);

float *array = (float *)dpnp_memory_alloc_c(len * sizeof(float));
float *result1 = (float *)dpnp_memory_alloc_c(size * sizeof(float));
float *result2 = (float *)dpnp_memory_alloc_c(len * sizeof(float));
Expand Down
2 changes: 0 additions & 2 deletions dpnp/backend/examples/example8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ int main(int, char **)
{
const size_t size = 16;

dpnp_queue_initialize_c(QueueOptions::GPU_SELECTOR);

double *array = (double *)dpnp_memory_alloc_c(size * sizeof(double));
long *result = (long *)dpnp_memory_alloc_c(size * sizeof(long));

Expand Down
2 changes: 0 additions & 2 deletions dpnp/backend/examples/example9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ int main(int, char **)
long result = 0;
long result_verification = 0;

dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR);

long *array =
reinterpret_cast<long *>(dpnp_memory_alloc_c(size * sizeof(long)));

Expand Down
1 change: 0 additions & 1 deletion dpnp/backend/examples/example_bs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@ int main(int, char **)
const double RISK_FREE = 0.1;
const double VOLATILITY = 0.2;

dpnp_queue_initialize_c(QueueOptions::GPU_SELECTOR);
std::cout << "SYCL queue is CPU: " << dpnp_queue_is_cpu_c() << std::endl;

double *price = (double *)dpnp_memory_alloc_c(SIZE * sizeof(double));
Expand Down
30 changes: 1 addition & 29 deletions dpnp/backend/include/dpnp_iface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,33 +68,6 @@ typedef ssize_t shape_elem_type;
* @}
*/

/**
* @ingroup BACKEND_API
* @brief SYCL queue initialization selector.
*
* The structure defines the parameters that are used for the library
* initialization by @ref dpnp_queue_initialize_c "dpnp_queue_initialize".
*/
enum class QueueOptions : uint32_t
{
CPU_SELECTOR, /**< CPU side execution mode */
GPU_SELECTOR, /**< Intel GPU side execution mode */
AUTO_SELECTOR /**< Automatic selection based on environment variable with
@ref CPU_SELECTOR default */
};

/**
* @ingroup BACKEND_API
* @brief SYCL queue initialization.
*
* Global SYCL queue initialization.
*
* @param [in] selector Select type @ref QueueOptions of the SYCL queue.
* Default @ref AUTO_SELECTOR
*/
INP_DLLEXPORT void dpnp_queue_initialize_c(
QueueOptions selector = QueueOptions::AUTO_SELECTOR);

/**
* @ingroup BACKEND_API
* @brief SYCL queue device status.
Expand All @@ -112,8 +85,7 @@ INP_DLLEXPORT size_t dpnp_queue_is_cpu_c();
* @param [in] size_in_bytes Number of bytes for requested memory allocation.
* @param [in] q_ref Reference to SYCL queue.
*
* @return A pointer to newly created memory on @ref dpnp_queue_initialize_c
* "initialized SYCL device".
* @return A pointer to newly created memory on SYCL device.
*/
INP_DLLEXPORT char *dpnp_memory_alloc_c(DPCTLSyclQueueRef q_ref,
size_t size_in_bytes);
Expand Down
3 changes: 2 additions & 1 deletion dpnp/backend/kernels/dpnp_krnl_random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ static VSLStreamStatePtr get_rng_stream()

void dpnp_rng_srand_c(size_t seed)
{
backend_sycl::backend_sycl_rng_engine_init(seed);
auto &be = backend_sycl::get();
be.set_rng_engines_seed(seed);
set_rng_stream(seed);
}

Expand Down
144 changes: 5 additions & 139 deletions dpnp/backend/src/queue_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,7 @@
#include "dpnp_utils.hpp"
#include "queue_sycl.hpp"

#if defined(DPNP_LOCAL_QUEUE)
sycl::queue *backend_sycl::queue = nullptr;
#endif
mkl_rng::mt19937 *backend_sycl::rng_engine = nullptr;
mkl_rng::mcg59 *backend_sycl::rng_mcg59_engine = nullptr;

static void dpnpc_show_mathlib_version()
[[maybe_unused]] static void dpnpc_show_mathlib_version()
{
#if 1
const int len = 256;
Expand All @@ -61,8 +55,8 @@ static void dpnpc_show_mathlib_version()
#endif
}

#if (not defined(NDEBUG)) && defined(DPNP_LOCAL_QUEUE)
static void show_available_sycl_devices()
#if (not defined(NDEBUG))
[[maybe_unused]] static void show_available_sycl_devices()
{
const std::vector<sycl::device> devices = sycl::device::get_devices();

Expand All @@ -86,25 +80,6 @@ static void show_available_sycl_devices()
}
#endif

#if defined(DPNP_LOCAL_QUEUE)
static sycl::device get_default_sycl_device()
{
int dpnpc_queue_gpu = 0;
sycl::device dev = sycl::device(sycl::cpu_selector());

const char *dpnpc_queue_gpu_var = getenv("DPNPC_QUEUE_GPU");
if (dpnpc_queue_gpu_var != NULL) {
dpnpc_queue_gpu = atoi(dpnpc_queue_gpu_var);
}

if (dpnpc_queue_gpu) {
dev = sycl::device(sycl::gpu_selector());
}

return dev;
}
#endif

#if defined(DPNPC_TOUCH_KERNEL_TO_LINK)
/**
* Function push the SYCL kernels to be linked (final stage of the compilation)
Expand Down Expand Up @@ -135,117 +110,8 @@ static long dpnp_kernels_link()
}
#endif

#if defined(DPNP_LOCAL_QUEUE)
// Catch asynchronous exceptions
static void exception_handler(sycl::exception_list exceptions)
{
for (std::exception_ptr const &e : exceptions) {
try {
std::rethrow_exception(e);
} catch (sycl::exception const &e) {
std::cout << "DPNP. Caught asynchronous SYCL exception:\n"
<< e.what() << std::endl;
}
}
};
#endif

void backend_sycl::backend_sycl_queue_init(QueueOptions selector)
{
#if defined(DPNP_LOCAL_QUEUE)
std::chrono::high_resolution_clock::time_point t1 =
std::chrono::high_resolution_clock::now();

if (queue) {
backend_sycl::destroy();
}

sycl::device dev;

#if not defined(NDEBUG)
show_available_sycl_devices();
#endif

if (QueueOptions::CPU_SELECTOR == selector) {
dev = sycl::device(sycl::cpu_selector());
}
else if (QueueOptions::GPU_SELECTOR == selector) {
dev = sycl::device(sycl::gpu_selector());
}
else {
dev = get_default_sycl_device();
}

if (is_verbose_mode()) {
sycl::property_list properties{
sycl::property::queue::enable_profiling()};
queue = new sycl::queue(dev, exception_handler, properties);
}
else {
queue = new sycl::queue(dev, exception_handler);
}

std::chrono::high_resolution_clock::time_point t2 =
std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_queue_init =
std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
#else
(void)selector;
#endif

std::chrono::high_resolution_clock::time_point t3 =
std::chrono::high_resolution_clock::now();
#if defined(DPNPC_TOUCH_KERNEL_TO_LINK)
// Remove pre-link kernel library at startup time
dpnp_kernels_link();
#endif
std::chrono::high_resolution_clock::time_point t4 =
std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_kernels_link =
std::chrono::duration_cast<std::chrono::duration<double>>(t4 - t3);

std::cout << "Running on: "
<< DPNP_QUEUE.get_device().get_info<sycl::info::device::name>()
<< "\n";
#if defined(DPNP_LOCAL_QUEUE)
std::cout << "queue initialization time: " << time_queue_init.count()
<< " (sec.)\n";
#else
std::cout << "DPCtrl SYCL queue used\n";
#endif
std::cout << "SYCL kernels link time: " << time_kernels_link.count()
<< " (sec.)\n";
dpnpc_show_mathlib_version();

std::cout << std::endl;
}

bool backend_sycl::backend_sycl_is_cpu()
{
sycl::queue &qptr = get_queue();

if (qptr.get_device().is_cpu()) {
return true;
}

return false;
}

void backend_sycl::backend_sycl_rng_engine_init(size_t seed)
{
if (rng_engine) {
backend_sycl::destroy_rng_engine();
}
rng_engine = new mkl_rng::mt19937(DPNP_QUEUE, seed);
rng_mcg59_engine = new mkl_rng::mcg59(DPNP_QUEUE, seed);
}

void dpnp_queue_initialize_c(QueueOptions selector)
{
backend_sycl::backend_sycl_queue_init(selector);
}

size_t dpnp_queue_is_cpu_c()
{
return backend_sycl::backend_sycl_is_cpu();
const auto &be = backend_sycl::get();
return be.backend_sycl_is_cpu();
}
Loading