Skip to content

Commit 33c241c

Browse files
Merge remote-tracking branch 'origin/master' into gold/2021
2 parents 22cc517 + f52182d commit 33c241c

22 files changed

+354
-88
lines changed

.github/workflows/generate-coverage.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ jobs:
7979
- name: Install dpctl dependencies
8080
shell: bash -l {0}
8181
run: |
82-
pip install numpy cython setuptools pytest pytest-cov scikit-build cmake coverage[toml]
82+
pip install numpy cython"<3" setuptools pytest pytest-cov scikit-build cmake coverage[toml]
8383
8484
- name: Build dpctl with coverage
8585
shell: bash -l {0}

.github/workflows/generate-docs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- name: Install Intel OneAPI
2727
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
2828
run: |
29-
sudo apt-get install intel-oneapi-dpcpp-cpp-compiler
29+
sudo apt-get install intel-oneapi-compiler-dpcpp-cpp
3030
- name: Install Lua
3131
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
3232
run: |
@@ -49,7 +49,7 @@ jobs:
4949
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
5050
shell: bash -l {0}
5151
run: |
52-
pip install numpy cython setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
52+
pip install numpy cython"<3" setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
5353
- name: Checkout repo
5454
uses: actions/checkout@v3
5555
with:

.github/workflows/os-llvm-sycl-build.yml

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ jobs:
1111

1212
env:
1313
DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download
14-
DRIVER_PATH: 2023-WW13
15-
OCLCPUEXP_FN: oclcpuexp-2023.15.3.0.20_rel.tar.gz
16-
FPGAEMU_FN: fpgaemu-2023.15.3.0.20_rel.tar.gz
14+
DRIVER_PATH: 2023-WW27
15+
OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz
16+
FPGAEMU_FN: fpgaemu-2023.16.6.0.28_rel.tar.gz
1717
TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/
1818
TBB_INSTALL_DIR: oneapi-tbb-2021.9.0
1919
TBB_FN: oneapi-tbb-2021.9.0-lin.tgz
@@ -37,34 +37,42 @@ jobs:
3737
${{ runner.os }}-
3838
3939
- name: Download and install nightly and components
40+
env:
41+
USE_LATEST_SYCLOS: 0
4042
shell: bash -l {0}
4143
run: |
4244
cd /home/runner/work
4345
mkdir -p sycl_bundle
4446
cd sycl_bundle
45-
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
46-
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
47-
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
48-
# initialize
49-
unset DEPLOY_NIGHTLY_TAG
50-
unset DEPLOY_NIGHTLY_TAG_SHA
51-
52-
# go through tags and find the most recent one where nighly build binary is available
53-
while IFS= read -r NEXT_LLVM_TAG; do
54-
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
55-
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
56-
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
57-
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
58-
then
59-
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
60-
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
61-
break
62-
fi
63-
done <<< "${LLVM_TAGS}"
47+
if [[ "${USE_LATEST_SYCLOS:-0}" -eq "1" ]]; then
48+
# get list of shas and tags from remote, filter sycl-nightly tags and reverse order
49+
export LLVM_TAGS=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | \
50+
grep sycl-nightly | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }')
51+
# initialize
52+
unset DEPLOY_NIGHTLY_TAG
53+
unset DEPLOY_NIGHTLY_TAG_SHA
54+
55+
# go through tags and find the most recent one where nighly build binary is available
56+
while IFS= read -r NEXT_LLVM_TAG; do
57+
export NEXT_LLVM_TAG_SHA=$(echo ${NEXT_LLVM_TAG} | awk '{print $1}')
58+
export NEXT_NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \
59+
$(echo ${NEXT_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}'))
60+
if [[ `wget -S --spider ${DOWNLOAD_URL_PREFIX}/${NEXT_NIGHTLY_TAG}/dpcpp-compiler.tar.gz 2>&1 | grep 'HTTP/1.1 200 OK'` ]];
61+
then
62+
export DEPLOY_NIGHTLY_TAG=${NEXT_NIGHTLY_TAG}
63+
export DEPLOY_LLVM_TAG_SHA=${NEXT_LLVM_TAG_SHA}
64+
break
65+
fi
66+
done <<< "${LLVM_TAGS}"
67+
else
68+
# Use latest known to work tag instead
69+
export DEPLOY_NIGHTLY_TAG="sycl-nightly%2F20230606"
70+
export DEPLOY_LLVM_TAG_SHA=f44d0133d4b0077298f034697a1f3818ff1d6134
71+
fi
6472
6573
[[ -n "${DEPLOY_NIGHTLY_TAG}" ]] || exit 1
6674
[[ -n "${DEPLOY_LLVM_TAG_SHA}" ]] || exit 1
67-
echo "Using ${m} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
75+
echo "Using ${DEPLOY_NIGHTLY_TAG} corresponding to intel/llvm at ${DEPLOY_LLVM_TAG_SHA}"
6876
6977
if [[ -f bundle_id.txt && ( "$(cat bundle_id.txt)" == "${DEPLOY_LLVM_TAG_SHA}" ) ]]; then
7078
echo "Using cached download of ${DEPLOY_LLVM_TAG_SHA}"
@@ -100,7 +108,7 @@ jobs:
100108
- name: Install dpctl dependencies
101109
shell: bash -l {0}
102110
run: |
103-
pip install numpy cython setuptools pytest scikit-build cmake
111+
pip install numpy cython"<3" setuptools pytest scikit-build cmake
104112
105113
- name: Checkout repo
106114
uses: actions/checkout@v3

conda-recipe/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ requirements:
2020
- cmake >=3.21
2121
- ninja
2222
- git
23-
- cython
23+
- cython <3
2424
- python
2525
- scikit-build
2626
- numpy

dpctl/tensor/_copy_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,11 @@ def _copy_same_shape(dst, src):
213213
"""Assumes src and dst have the same shape."""
214214
# check that memory regions do not overlap
215215
if ti._array_overlap(dst, src):
216+
if src._pointer == dst._pointer and (
217+
src is dst
218+
or (src.strides == dst.strides and src.dtype == dst.dtype)
219+
):
220+
return
216221
_copy_overlapping(src=src, dst=dst)
217222
return
218223

dpctl/tensor/_elementwise_common.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ def __call__(self, x, out=None, order="K"):
5252
if not isinstance(x, dpt.usm_ndarray):
5353
raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
5454

55+
if order not in ["C", "F", "K", "A"]:
56+
order = "K"
57+
buf_dt, res_dt = _find_buf_dtype(
58+
x.dtype, self.result_type_resolver_fn_, x.sycl_device
59+
)
60+
if res_dt is None:
61+
raise RuntimeError
62+
63+
orig_out = out
5564
if out is not None:
5665
if not isinstance(out, dpt.usm_ndarray):
5766
raise TypeError(
@@ -64,8 +73,21 @@ def __call__(self, x, out=None, order="K"):
6473
f"Expected output shape is {x.shape}, got {out.shape}"
6574
)
6675

67-
if ti._array_overlap(x, out):
68-
raise TypeError("Input and output arrays have memory overlap")
76+
if res_dt != out.dtype:
77+
raise TypeError(
78+
f"Output array of type {res_dt} is needed,"
79+
f" got {out.dtype}"
80+
)
81+
82+
if (
83+
buf_dt is None
84+
and ti._array_overlap(x, out)
85+
and not ti._same_logical_tensors(x, out)
86+
):
87+
# Allocate a temporary buffer to avoid memory overlapping.
88+
# Note if `buf_dt` is not None, a temporary copy of `x` will be
89+
# created, so the array overlap check isn't needed.
90+
out = dpt.empty_like(out)
6991

7092
if (
7193
dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue))
@@ -75,13 +97,6 @@ def __call__(self, x, out=None, order="K"):
7597
"Input and output allocation queues are not compatible"
7698
)
7799

78-
if order not in ["C", "F", "K", "A"]:
79-
order = "K"
80-
buf_dt, res_dt = _find_buf_dtype(
81-
x.dtype, self.result_type_resolver_fn_, x.sycl_device
82-
)
83-
if res_dt is None:
84-
raise RuntimeError
85100
exec_q = x.sycl_queue
86101
if buf_dt is None:
87102
if out is None:
@@ -91,17 +106,20 @@ def __call__(self, x, out=None, order="K"):
91106
if order == "A":
92107
order = "F" if x.flags.f_contiguous else "C"
93108
out = dpt.empty_like(x, dtype=res_dt, order=order)
94-
else:
95-
if res_dt != out.dtype:
96-
raise TypeError(
97-
f"Output array of type {res_dt} is needed,"
98-
f" got {out.dtype}"
99-
)
100109

101-
ht, _ = self.unary_fn_(x, out, sycl_queue=exec_q)
102-
ht.wait()
110+
ht_unary_ev, unary_ev = self.unary_fn_(x, out, sycl_queue=exec_q)
111+
112+
if not (orig_out is None or orig_out is out):
113+
# Copy the out data from temporary buffer to original memory
114+
ht_copy_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
115+
src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev]
116+
)
117+
ht_copy_ev.wait()
118+
out = orig_out
103119

120+
ht_unary_ev.wait()
104121
return out
122+
105123
if order == "K":
106124
buf = _empty_like_orderK(x, buf_dt)
107125
else:
@@ -117,11 +135,6 @@ def __call__(self, x, out=None, order="K"):
117135
out = _empty_like_orderK(buf, res_dt)
118136
else:
119137
out = dpt.empty_like(buf, dtype=res_dt, order=order)
120-
else:
121-
if buf_dt != out.dtype:
122-
raise TypeError(
123-
f"Output array of type {buf_dt} is needed, got {out.dtype}"
124-
)
125138

126139
ht, _ = self.unary_fn_(buf, out, sycl_queue=exec_q, depends=[copy_ev])
127140
ht_copy_ev.wait()

dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,9 @@ struct SequentialBooleanReduction
198198
// must convert to boolean first to handle nans
199199
using dpctl::tensor::type_utils::convert_impl;
200200
outT val = convert_impl<bool, argT>(inp_[inp_offset]);
201+
ReductionOp op = reduction_op_;
201202

202-
red_val = reduction_op_(red_val, val);
203+
red_val = op(red_val, val);
203204
}
204205

205206
out_[out_iter_offset] = red_val;
@@ -452,9 +453,9 @@ struct StridedBooleanReduction
452453
// must convert to boolean first to handle nans
453454
using dpctl::tensor::type_utils::convert_impl;
454455
bool val = convert_impl<bool, argT>(inp_[inp_offset]);
456+
ReductionOp op = reduction_op_;
455457

456-
local_red_val =
457-
reduction_op_(local_red_val, static_cast<outT>(val));
458+
local_red_val = op(local_red_val, static_cast<outT>(val));
458459
}
459460
}
460461
// reduction and atomic operations are performed

dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ template <typename argT, typename resT> struct ProjFunctor
7272
const realT y = std::imag(in);
7373

7474
if (std::isinf(x) || std::isinf(y)) {
75-
const realT res_im = std::copysign(0.0, y);
75+
const realT res_im = std::copysign(realT(0), y);
7676
return resT{std::numeric_limits<realT>::infinity(), res_im};
7777
}
7878
return in;

dpctl/tensor/libtensor/include/utils/memory_overlap.hpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,53 @@ struct MemoryOverlap
100100
}
101101
};
102102

103+
struct SameLogicalTensors
104+
{
105+
bool operator()(dpctl::tensor::usm_ndarray ar1,
106+
dpctl::tensor::usm_ndarray ar2) const
107+
{
108+
// Same ndim
109+
int nd1 = ar1.get_ndim();
110+
if (nd1 != ar2.get_ndim())
111+
return false;
112+
113+
// Same dtype
114+
int tn1 = ar1.get_typenum();
115+
if (tn1 != ar2.get_typenum())
116+
return false;
117+
118+
// Same pointer
119+
const char *ar1_data = ar1.get_data();
120+
const char *ar2_data = ar2.get_data();
121+
122+
if (ar1_data != ar2_data)
123+
return false;
124+
125+
// Same shape and strides
126+
const py::ssize_t *ar1_shape = ar1.get_shape_raw();
127+
const py::ssize_t *ar2_shape = ar2.get_shape_raw();
128+
129+
if (!std::equal(ar1_shape, ar1_shape + nd1, ar2_shape))
130+
return false;
131+
132+
// Same shape and strides
133+
auto const &ar1_strides = ar1.get_strides_vector();
134+
auto const &ar2_strides = ar2.get_strides_vector();
135+
136+
auto ar1_beg_it = std::begin(ar1_strides);
137+
auto ar1_end_it = std::end(ar1_strides);
138+
139+
auto ar2_beg_it = std::begin(ar2_strides);
140+
141+
if (!std::equal(ar1_beg_it, ar1_end_it, ar2_beg_it))
142+
return false;
143+
144+
// all checks passed: arrays are logical views
145+
// into the same memory
146+
return true;
147+
}
148+
};
149+
103150
} // namespace overlap
104151
} // namespace tensor
105152
} // namespace dpctl

dpctl/tensor/libtensor/source/elementwise_functions.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ py_unary_ufunc(dpctl::tensor::usm_ndarray src,
128128

129129
// check memory overlap
130130
auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
131-
if (overlap(src, dst)) {
131+
auto const &same_logical_tensors =
132+
dpctl::tensor::overlap::SameLogicalTensors();
133+
if (overlap(src, dst) && !same_logical_tensors(src, dst)) {
132134
throw py::value_error("Arrays index overlapping segments of memory");
133135
}
134136

dpctl/tensor/libtensor/source/tensor_py.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ using dpctl::tensor::c_contiguous_strides;
6060
using dpctl::tensor::f_contiguous_strides;
6161

6262
using dpctl::tensor::overlap::MemoryOverlap;
63+
using dpctl::tensor::overlap::SameLogicalTensors;
6364

6465
using dpctl::tensor::py_internal::copy_usm_ndarray_into_usm_ndarray;
6566

@@ -338,6 +339,15 @@ PYBIND11_MODULE(_tensor_impl, m)
338339
"Determines if the memory regions indexed by each array overlap",
339340
py::arg("array1"), py::arg("array2"));
340341

342+
auto same_logical_tensors = [](dpctl::tensor::usm_ndarray x1,
343+
dpctl::tensor::usm_ndarray x2) -> bool {
344+
auto const &same_logical_tensors = SameLogicalTensors();
345+
return same_logical_tensors(x1, x2);
346+
};
347+
m.def("_same_logical_tensors", same_logical_tensors,
348+
"Determines if the memory regions indexed by each array are the same",
349+
py::arg("array1"), py::arg("array2"));
350+
341351
m.def("_place", &py_place, "", py::arg("dst"), py::arg("cumsum"),
342352
py::arg("axis_start"), py::arg("axis_end"), py::arg("rhs"),
343353
py::arg("sycl_queue"), py::arg("depends") = py::list());

dpctl/tests/_numpy_warnings.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Data Parallel Control (dpctl)
2+
#
3+
# Copyright 2023 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import numpy
18+
import pytest
19+
20+
21+
@pytest.fixture
22+
def suppress_invalid_numpy_warnings():
23+
# invalid: treatment for invalid floating-point operation
24+
# (result is not an expressible number, typically indicates
25+
# that a NaN was produced)
26+
old_settings = numpy.seterr(invalid="ignore")
27+
yield
28+
numpy.seterr(**old_settings) # reset to default

dpctl/tests/conftest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,15 @@
2626
invalid_filter,
2727
valid_filter,
2828
)
29+
from _numpy_warnings import suppress_invalid_numpy_warnings
2930

3031
sys.path.append(os.path.join(os.path.dirname(__file__), "helper"))
3132

3233
# common fixtures
33-
__all__ = ["check", "device_selector", "invalid_filter", "valid_filter"]
34+
__all__ = [
35+
"check",
36+
"device_selector",
37+
"invalid_filter",
38+
"suppress_invalid_numpy_warnings",
39+
"valid_filter",
40+
]

0 commit comments

Comments
 (0)