Skip to content

Commit 926e315

Browse files
Merge pull request #390 from IntelPython/improve-examples
Improve examples
2 parents 7767b4f + 8e44952 commit 926e315

File tree

14 files changed

+513
-115
lines changed

14 files changed

+513
-115
lines changed

examples/cython/sycl_buffer/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ CC=clang CXX=dpcpp python setup.py build_ext --inplace
2121
#2 Running
2222

2323
```
24-
# SYCL_BE=PI_OPENCL sets SYCL backend to OpenCL to avoid a
24+
# SYCL_DEVICE_FILTER=opencl sets SYCL backend to OpenCL to avoid a
2525
# transient issue with MKL's using the default Level-0 backend
26-
(idp) [08:16:12 ansatnuc04 simple]$ SYCL_BE=PI_OPENCL ipython
26+
(idp) [08:16:12 ansatnuc04 simple]$ SYCL_DEVICE_FILTER=opencl ipython
2727
Python 3.7.7 (default, Jul 14 2020, 22:02:37)
2828
Type 'copyright', 'credits' or 'license' for more information
2929
IPython 7.17.0 -- An enhanced Interactive Python. Type '?' for help.
@@ -67,7 +67,7 @@ Times for NumPy
6767
Running run.py:
6868

6969
```
70-
(idp) [09:14:53 ansatnuc04 sycl_buffer]$ SYCL_BE=PI_OPENCL python run.py
70+
(idp) [09:14:53 ansatnuc04 sycl_buffer]$ SYCL_DEVICE_FILTER=opencl python run.py
7171
Result computed by NumPy
7272
[ 0.27170187 -23.36798583 7.31326489 -1.95121928]
7373
Result computed by SYCL extension

examples/cython/sycl_buffer/_buffer_example.pyx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,19 @@ cdef extern from "use_sycl_buffer.h":
2424
int c_columnwise_total(c_dpctl.DPCTLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil
2525
int c_columnwise_total_no_mkl(c_dpctl.DPCTLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil
2626

27-
def columnwise_total(double[:, ::1] v, method='mkl'):
27+
def columnwise_total(double[:, ::1] v, method='mkl', queue=None):
2828
cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d')
2929
cdef double[::1] res_memslice = res_array
3030
cdef int ret_status
3131
cdef c_dpctl.SyclQueue q
3232
cdef c_dpctl.DPCTLSyclQueueRef q_ref
3333

34-
q = c_dpctl.get_current_queue()
34+
if (queue is None):
35+
q = c_dpctl.SyclQueue()
36+
elif isinstance(queue, dpctl.SyclQueue):
37+
q = <c_dpctl.SyclQueue> queue
38+
else:
39+
q = c_dpctl.SyclQueue(queue)
3540
q_ref = q.get_queue_ref()
3641

3742
if method == 'mkl':

examples/cython/sycl_buffer/bench.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,19 @@
2424
print("=" * 10 + " Executing warm-up " + "=" * 10)
2525
print("NumPy result: ", X.sum(axis=0))
2626

27-
dpctl.set_global_queue("opencl:cpu")
27+
q = dpctl.SyclQueue("opencl:cpu")
2828
print(
2929
"SYCL({}) result: {}".format(
30-
dpctl.get_current_queue().sycl_device.name,
31-
sb.columnwise_total(X),
30+
q.sycl_device.name,
31+
sb.columnwise_total(X, queue=q),
3232
)
3333
)
3434

35-
dpctl.set_default_queue("opencl:gpu")
35+
q = dpctl.SyclQueue("opencl:gpu")
3636
print(
3737
"SYCL({}) result: {}".format(
38-
dpctl.get_current_queue().sycl_device.name,
39-
sb.columnwise_total(X),
38+
q.sycl_device.name,
39+
sb.columnwise_total(X, queue=q),
4040
)
4141
)
4242

@@ -45,9 +45,9 @@
4545
print("Times for 'opencl:cpu'")
4646
print(
4747
timeit.repeat(
48-
stmt="sb.columnwise_total(X)",
49-
setup='dpctl.set_global_queue("opencl:cpu"); '
50-
"sb.columnwise_total(X)", # ensure JIT compilation is not counted
48+
stmt="sb.columnwise_total(X, queue=q)",
49+
setup='q = dpctl.SyclQueue("opencl:cpu"); '
50+
"sb.columnwise_total(X, queue=q)", # ensure JIT compilation is not counted
5151
number=100,
5252
globals=globals(),
5353
)
@@ -56,8 +56,8 @@
5656
print("Times for 'opencl:gpu'")
5757
print(
5858
timeit.repeat(
59-
stmt="sb.columnwise_total(X)",
60-
setup='dpctl.set_default_queue("opencl:gpu"); sb.columnwise_total(X)',
59+
stmt="sb.columnwise_total(X, queue=q)",
60+
setup='q = dpctl.SyclQueue("opencl:gpu"); sb.columnwise_total(X, queue=q)',
6161
number=100,
6262
globals=globals(),
6363
)

examples/cython/sycl_buffer/run.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,24 @@
1616

1717
import syclbuffer as sb
1818
import numpy as np
19+
import dpctl
1920

2021
X = np.random.randn(100, 4)
2122

2223
print("Result computed by NumPy")
2324
print(X.sum(axis=0))
24-
print("Result computed by SYCL extension")
25+
print("Result computed by SYCL extension using default offloading target")
2526
print(sb.columnwise_total(X))
2627

2728

2829
print("")
30+
2931
# controlling where to offload
30-
import dpctl
3132

32-
with dpctl.device_context("opencl:gpu"):
33-
print("Running on: ", dpctl.get_current_queue().sycl_device.name)
34-
print(sb.columnwise_total(X))
33+
q = dpctl.SyclQueue("opencl:gpu")
34+
print("Running on: ", q.sycl_device.name)
35+
print(sb.columnwise_total(X, queue=q))
3536

36-
with dpctl.device_context("opencl:cpu"):
37-
print("Running on: ", dpctl.get_current_queue().sycl_device.name)
38-
print(sb.columnwise_total(X))
37+
q = dpctl.SyclQueue("opencl:cpu")
38+
print("Running on: ", q.sycl_device.name)
39+
print(sb.columnwise_total(X, queue=q))

examples/cython/sycl_direct_linkage/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ To illustrate the queue creation overhead in each call, compare execution of def
2626
which is Intel Gen9 GPU on OpenCL backend:
2727

2828
```
29-
(idp) [11:24:38 ansatnuc04 sycl_direct_linkage]$ SYCL_BE=PI_OPENCL python bench.py
29+
(idp) [11:24:38 ansatnuc04 sycl_direct_linkage]$ SYCL_DEVICE_FILTER=opencl:gpu python bench.py
3030
========== Executing warm-up ==========
3131
NumPy result: [1. 1. 1. ... 1. 1. 1.]
3232
SYCL(default_device) result: [1. 1. 1. ... 1. 1. 1.]
@@ -37,7 +37,7 @@ Times for NumPy
3737
[3.5394036192446947, 3.498957809060812, 3.4925728561356664, 3.5036555202677846, 3.493739523924887]
3838
```
3939

40-
vs. timing when `dpctl`'s current queue is being reused:
40+
vs. timing when `dpctl`'s queue is being reused:
4141

4242
```
4343
(idp) [11:29:14 ansatnuc04 sycl_buffer]$ python bench.py

examples/cython/usm_memory/blackscholes.pyx

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,18 @@ cdef extern from "sycl_blackscholes.hpp":
2929
cdef void cpp_blackscholes[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T* callput) except +
3030
cdef void cpp_populate_params[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) except +
3131

32-
def black_scholes_price(floating[:, ::1] option_params):
32+
cdef c_dpctl.SyclQueue from_queue_keyword(queue):
33+
if (queue is None):
34+
return c_dpctl.SyclQueue()
35+
elif isinstance(queue, dpctl.SyclQueue):
36+
return <c_dpctl.SyclQueue> queue
37+
else:
38+
return c_dpctl.SyclQueue(queue)
39+
# use default
40+
return c_dpctl.SyclQueue()
41+
42+
43+
def black_scholes_price(floating[:, ::1] option_params, queue=None):
3344
cdef size_t n_opts = option_params.shape[0]
3445
cdef size_t n_params = option_params.shape[1]
3546
cdef size_t n_bytes = 0
@@ -49,19 +60,19 @@ def black_scholes_price(floating[:, ::1] option_params):
4960
"Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)."
5061
).format(n_params))
5162

52-
q = c_dpctl.get_current_queue()
63+
q = from_queue_keyword(queue)
5364
q_ptr = q.get_queue_ref()
5465
if (floating is double):
5566
n_bytes = 2*n_opts * sizeof(double)
56-
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes)
67+
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes, queue=q)
5768
callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='d')
5869
call_put_prices = callput_arr
5970
dp1 = &option_params[0,0]
6071
dp2 = &call_put_prices[0,0];
6172
cpp_blackscholes[double](q_ptr, n_opts, dp1, dp2)
6273
elif (floating is float):
6374
n_bytes = 2*n_opts * sizeof(float)
64-
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes)
75+
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes, queue=q)
6576
callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='f')
6677
call_put_prices = callput_arr
6778
fp1 = &option_params[0,0]
@@ -70,7 +81,7 @@ def black_scholes_price(floating[:, ::1] option_params):
7081

7182
return callput_arr
7283

73-
def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, int seed):
84+
def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, int seed, queue=None):
7485
cdef size_t n_opts = option_params.shape[0]
7586
cdef size_t n_params = option_params.shape[1]
7687

@@ -85,7 +96,7 @@ def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl,
8596
"Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)."
8697
).format(n_params))
8798

88-
q = c_dpctl.get_current_queue()
99+
q = from_queue_keyword(queue)
89100
q_ptr = q.get_queue_ref()
90101
if (floating is double):
91102
dp = &option_params[0,0]

examples/cython/usm_memory/run.py

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@
2121
from reference_black_scholes import ref_python_black_scholes
2222

2323

24-
def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype):
25-
usm_mem = dpctl_mem.MemoryUSMShared(n_opts * 5 * np.dtype(dtype).itemsize)
26-
# usm_mem2 = dpctl_mem.MemoryUSMDevice(n_opts * 5 * np.dtype(dtype).itemsize)
24+
def gen_option_params(
25+
n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype, queue=None
26+
):
27+
nbytes = n_opts * 5 * np.dtype(dtype).itemsize
28+
usm_mem = dpctl_mem.MemoryUSMShared(nbytes, queue=queue)
2729
params = np.ndarray(shape=(n_opts, 5), buffer=usm_mem, dtype=dtype)
2830
seed = 1234
29-
bs.populate_params(params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed)
31+
bs.populate_params(
32+
params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed, queue=queue
33+
)
3034
return params
3135

3236

@@ -47,38 +51,44 @@ def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype):
4751
# compute prices in CPython
4852
X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype="d")
4953

50-
print(np.allclose(Xgpu, X_ref, atol=1e-5))
54+
print(
55+
"Correctness check: allclose(Xgpu, Xref) == ", np.allclose(Xgpu, X_ref, atol=1e-5)
56+
)
5157

5258
n_opts = 3 * 10 ** 6
5359

5460
# compute on CPU sycl device
5561
import timeit
5662

57-
for _ in range(3):
63+
cpu_q = dpctl.SyclQueue("opencl:cpu:0")
64+
opts1 = gen_option_params(
65+
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d", queue=cpu_q
66+
)
67+
68+
gpu_q = dpctl.SyclQueue("level_zero:gpu:0")
69+
opts2 = gen_option_params(
70+
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d", queue=gpu_q
71+
)
5872

59-
dpctl.set_global_queue("opencl:cpu:0")
60-
print("Using : {}".format(dpctl.get_current_queue().sycl_device.name))
73+
cpu_times = []
74+
gpu_times = []
75+
for _ in range(5):
6176

6277
t0 = timeit.default_timer()
63-
opts1 = gen_option_params(
64-
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d"
65-
)
66-
X1 = bs.black_scholes_price(opts1)
78+
X1 = bs.black_scholes_price(opts1, queue=cpu_q)
6779
t1 = timeit.default_timer()
6880

69-
print("Elapsed: {}".format(t1 - t0))
81+
cpu_times.append(t1 - t0)
7082

7183
# compute on GPU sycl device
72-
dpctl.set_global_queue("level_zero:gpu:0")
73-
print("Using : {}".format(dpctl.get_current_queue().sycl_device.name))
7484

7585
t0 = timeit.default_timer()
76-
opts2 = gen_option_params(
77-
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d"
78-
)
79-
X2 = bs.black_scholes_price(opts2)
86+
X2 = bs.black_scholes_price(opts2, queue=gpu_q)
8087
t1 = timeit.default_timer()
81-
print("Elapsed: {}".format(t1 - t0))
88+
gpu_times.append(t1 - t0)
89+
90+
print("Using : {}".format(cpu_q.sycl_device.name))
91+
print("Wall times : {}".format(cpu_times))
8292

83-
print(np.abs(opts1 - opts2).max())
84-
print(np.abs(X2 - X1).max())
93+
print("Using : {}".format(gpu_q.sycl_device.name))
94+
print("Wall times : {}".format(gpu_times))

examples/python/_runner.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Data Parallel Control (dpctl)
2+
#
3+
# Copyright 2020-2021 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import argparse
18+
import inspect
19+
20+
21+
def has_nondefault_params(sgn):
22+
for v in sgn.parameters.values():
23+
if v.default is inspect._empty:
24+
return True
25+
return False
26+
27+
28+
def run_examples(example_description, glbls_dict):
29+
parser = argparse.ArgumentParser(
30+
description=example_description,
31+
)
32+
parser.add_argument(
33+
"-r",
34+
"--run",
35+
type=str,
36+
help="Functions to execute. Use --run all to run all of them.",
37+
)
38+
parser.add_argument(
39+
"-l", "--list", action="store_true", help="List available function names to run"
40+
)
41+
parser.add_argument(
42+
"-q", "--quiet", action="store_true", help="Do not echo example name."
43+
)
44+
args = parser.parse_args()
45+
46+
if args.list or not args.run:
47+
fns = []
48+
for n in glbls_dict:
49+
if inspect.isfunction(glbls_dict.get(n)):
50+
fns.append(n)
51+
if fns:
52+
print("Available examples:")
53+
print(", ".join(fns))
54+
else:
55+
print("No examples are availble.")
56+
exit(0)
57+
if args.run == "all":
58+
fns = []
59+
for n in glbls_dict:
60+
if inspect.isfunction(glbls_dict.get(n)):
61+
fns.append(n)
62+
args.run = fns
63+
else:
64+
args.run = args.run.split()
65+
66+
if args.run:
67+
for fn in args.run:
68+
if fn in glbls_dict:
69+
clbl = glbls_dict.get(fn)
70+
sgn = inspect.signature(clbl)
71+
print("")
72+
if has_nondefault_params(sgn):
73+
if not args.quiet:
74+
print(f"INFO: Skip exectution of {fn} as it requires arguments")
75+
else:
76+
if not args.quiet:
77+
print(f"INFO: Executing example {fn}")
78+
clbl()
79+
if not args.quiet:
80+
print("INFO: ===========================")
81+
82+
else:
83+
raise ValueError("No function to run was specified")

0 commit comments

Comments
 (0)