Skip to content

Commit 183817a

Browse files
Merge remote-tracking branch 'origin/master' into elementwise-hyper-and-trig-funcs
2 parents 781898e + 73a2b68 commit 183817a

File tree

6 files changed

+60
-35
lines changed

6 files changed

+60
-35
lines changed

.github/workflows/conda-package.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818

1919
strategy:
2020
matrix:
21-
python: ['3.8', '3.9', '3.10']
21+
python: ['3.8', '3.9', '3.10', '3.11']
2222
steps:
2323
- uses: actions/checkout@v3
2424
with:
@@ -63,7 +63,7 @@ jobs:
6363

6464
strategy:
6565
matrix:
66-
python: ['3.8', '3.9', '3.10']
66+
python: ['3.8', '3.9', '3.10', '3.11']
6767
env:
6868
conda-bld: C:\Miniconda\conda-bld\win-64\
6969
steps:
@@ -102,7 +102,7 @@ jobs:
102102

103103
strategy:
104104
matrix:
105-
python: ['3.8', '3.9', '3.10']
105+
python: ['3.8', '3.9', '3.10', '3.11']
106106
experimental: [false]
107107
runner: [ubuntu-20.04]
108108
continue-on-error: ${{ matrix.experimental }}
@@ -185,7 +185,7 @@ jobs:
185185
shell: cmd /C CALL {0}
186186
strategy:
187187
matrix:
188-
python: ['3.8', '3.9', '3.10']
188+
python: ['3.8', '3.9', '3.10', '3.11']
189189
experimental: [false]
190190
runner: [windows-latest]
191191
continue-on-error: ${{ matrix.experimental }}
@@ -300,7 +300,7 @@ jobs:
300300
runs-on: ubuntu-20.04
301301
strategy:
302302
matrix:
303-
python: ['3.8', '3.9', '3.10']
303+
python: ['3.8', '3.9', '3.10', '3.11']
304304
steps:
305305
- name: Download artifact
306306
uses: actions/download-artifact@v3
@@ -324,7 +324,7 @@ jobs:
324324
runs-on: windows-latest
325325
strategy:
326326
matrix:
327-
python: ['3.8', '3.9', '3.10']
327+
python: ['3.8', '3.9', '3.10', '3.11']
328328
steps:
329329
- name: Download artifact
330330
uses: actions/download-artifact@v3

conda-recipe/build.sh

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
#!/bin/bash
22

3-
# Workaround to Klocwork overwriting LD_LIBRARY_PATH that was modified
4-
# by DPC++ compiler conda packages. Will need to be added to DPC++ compiler
5-
# activation scripts.
6-
export LDFLAGS="$LDFLAGS -Wl,-rpath,$PREFIX/lib"
7-
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PREFIX/compiler/lib/intel64_lin:$PREFIX/compiler/lib:$PREFIX/lib"
3+
# This is necessary to help DPC++ find Intel libraries such as SVML, IRNG, etc in build prefix
4+
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib"
85

96
# Intel LLVM must cooperate with compiler and sysroot from conda
107
echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg

dpctl/tensor/_reduction.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ def sum(arr, axis=None, dtype=None, keepdims=False):
123123

124124
res_usm_type = arr.usm_type
125125
if arr.size == 0:
126+
if keepdims:
127+
res_shape = res_shape + (1,) * red_nd
128+
inv_perm = sorted(range(nd), key=lambda d: perm[d])
129+
res_shape = tuple(res_shape[i] for i in inv_perm)
126130
return dpt.zeros(
127131
res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
128132
)

dpctl/tests/elementwise/test_floor_ceil_trunc.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_floor_ceil_trunc_usm_type(np_call, dpt_call, usm_type):
5757
q = get_queue_or_skip()
5858

5959
arg_dt = np.dtype("f4")
60-
input_shape = (2, 2, 2, 10)
60+
input_shape = (10, 10, 10, 10)
6161
X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q)
6262
X[..., 0::2] = -0.4
6363
X[..., 1::2] = 0.7
@@ -67,8 +67,7 @@ def test_floor_ceil_trunc_usm_type(np_call, dpt_call, usm_type):
6767
assert Y.sycl_queue == X.sycl_queue
6868
assert Y.flags.c_contiguous
6969

70-
expected_Y = np.empty(input_shape, dtype=arg_dt)
71-
expected_Y = np_call(np.float32(X))
70+
expected_Y = np_call(dpt.asnumpy(X))
7271
tol = 8 * dpt.finfo(Y.dtype).resolution
7372
assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
7473

@@ -125,12 +124,6 @@ def test_floor_ceil_trunc_errors(dpt_call):
125124
y,
126125
)
127126

128-
x = dpt.zeros(2)
129-
y = x
130-
assert_raises_regex(
131-
TypeError, "Input and output arrays have memory overlap", dpt_call, x, y
132-
)
133-
134127
x = dpt.zeros(2, dtype="float32")
135128
y = np.empty_like(x)
136129
assert_raises_regex(

dpctl/tests/test_sycl_kernel_submit.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -167,16 +167,21 @@ def test_async_submit():
167167
assert isinstance(kern2Kernel, dpctl_prog.SyclKernel)
168168

169169
status_complete = dpctl.event_status_type.complete
170-
n = 256 * 1024
171-
X = dpt.empty((3, n), dtype="u4", usm_type="device", sycl_queue=q)
170+
171+
# choose input size based on capability of the device
172+
f = q.sycl_device.max_work_group_size
173+
n = f * 1024
174+
n_alloc = 4 * n
175+
176+
X = dpt.empty((3, n_alloc), dtype="u4", usm_type="device", sycl_queue=q)
172177
first_row = dpctl_mem.as_usm_memory(X[0])
173178
second_row = dpctl_mem.as_usm_memory(X[1])
174179
third_row = dpctl_mem.as_usm_memory(X[2])
175180

176181
p1, p2 = 17, 27
177182

178183
async_detected = False
179-
for _ in range(5):
184+
for attempt in range(5):
180185
e1 = q.submit(
181186
kern1Kernel,
182187
[
@@ -209,19 +214,22 @@ def test_async_submit():
209214
e3_st = e3.execution_status
210215
e2_st = e2.execution_status
211216
e1_st = e1.execution_status
212-
if not all(
213-
[
214-
e == status_complete
215-
for e in (
216-
e1_st,
217-
e2_st,
218-
e3_st,
219-
)
220-
]
221-
):
217+
are_complete = [
218+
e == status_complete
219+
for e in (
220+
e1_st,
221+
e2_st,
222+
e3_st,
223+
)
224+
]
225+
e3.wait()
226+
if not all(are_complete):
222227
async_detected = True
223-
e3.wait()
224228
break
229+
else:
230+
n = n * (1 if attempt % 2 == 0 else 2)
231+
if n > n_alloc:
232+
break
225233

226234
assert async_detected, "No evidence of async submission detected, unlucky?"
227235
Xnp = dpt.asnumpy(X)
@@ -231,4 +239,4 @@ def test_async_submit():
231239
Xref[1, i] = (i * i * i) % p2
232240
Xref[2, i] = min(Xref[0, i], Xref[1, i])
233241

234-
assert np.array_equal(Xnp, Xref)
242+
assert np.array_equal(Xnp[:, :n], Xref[:, :n])

dpctl/tests/test_tensor_sum.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,26 @@ def test_sum_arg_out_dtype_scalar(arg_dtype, out_dtype):
133133
assert isinstance(r, dpt.usm_ndarray)
134134
assert r.dtype == dpt.dtype(out_dtype)
135135
assert dpt.asnumpy(r) == 1
136+
137+
138+
def test_sum_keepdims_zero_size():
139+
"""See gh-1293"""
140+
get_queue_or_skip()
141+
n = 10
142+
a = dpt.ones((n, 0, n))
143+
144+
s1 = dpt.sum(a, keepdims=True)
145+
assert s1.shape == (1, 1, 1)
146+
147+
s2 = dpt.sum(a, axis=(0, 1), keepdims=True)
148+
assert s2.shape == (1, 1, n)
149+
150+
s3 = dpt.sum(a, axis=(1, 2), keepdims=True)
151+
assert s3.shape == (n, 1, 1)
152+
153+
s4 = dpt.sum(a, axis=(0, 2), keepdims=True)
154+
assert s4.shape == (1, 0, 1)
155+
156+
a0 = a[0]
157+
s5 = dpt.sum(a0, keepdims=True)
158+
assert s5.shape == (1, 1)

0 commit comments

Comments
 (0)