IntelPython · ndgrigorian · Feb 14, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
@@ -1072,7 +1072,7 @@ bool queues_are_compatible(const sycl::queue &exec_q,
     return true;
 }
 
-/*! @brief Check if all allocation queues of  usm_ndarays are the same as
+/*! @brief Check if all allocation queues of usm_ndarays are the same as
     the execution queue */
 template <std::size_t num>
 bool queues_are_compatible(const sycl::queue &exec_q,

@@ -0,0 +1,76 @@
+//===- output_validation.hpp - Utilities for output array validation
+//-*-C++-*===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2022 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines utilities for determining if an array is a valid output
+/// array.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+#include "dpctl4pybind11.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+
+namespace tensor
+{
+
+namespace validation
+{
+
+/*! @brief Raises a value error if an array is read-only.
+
+    This should be called with an array before writing.*/
+struct CheckWritable
+{
+    static void throw_if_not_writable(const dpctl::tensor::usm_ndarray &arr)
+    {
+        if (!arr.is_writable()) {
+            throw py::value_error("output array is read-only.");
+        }
+        return;
+    }
+};
+
+/*! @brief Raises a value error if an array's memory is not sufficiently ample
+    to accommodate an input number of elements.
+
+    This should be called with an array before writing.*/
+struct AmpleMemory
+{
+    template <typename T>
+    static void throw_if_not_ample(const dpctl::tensor::usm_ndarray &arr,
+                                   T nelems)
+    {
+        auto arr_offsets = arr.get_minmax_offsets();
+        T range = static_cast<T>(arr_offsets.second - arr_offsets.first);
+        if (range + 1 < nelems) {
+            throw py::value_error("Memory addressed by the output array is not "
+                                  "sufficiently ample.");
+        }
+        return;
+    }
+};
+
+} // namespace validation
+} // namespace tensor
+} // namespace dpctl
@@ -35,6 +35,7 @@
 #include "simplify_iteration_space.hpp"
 #include "utils/memory_overlap.hpp"
 #include "utils/offset_utils.hpp"
+#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
 namespace dpctl
@@ -102,6 +103,8 @@ size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask,
                          sycl::queue &exec_q,
                          const std::vector<sycl::event> &depends)
 {
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(cumsum);
+
     // cumsum is 1D
     if (cumsum.get_ndim() != 1) {
         throw py::value_error("Result array must be one-dimensional.");
@@ -274,6 +277,8 @@ size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src,
             "Execution queue is not compatible with allocation queues");
     }
 
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(cumsum);
+
     if (src_size == 0) {
         return 0;
     }

@@ -37,6 +37,7 @@
 #include "simplify_iteration_space.hpp"
 #include "utils/memory_overlap.hpp"
 #include "utils/offset_utils.hpp"
+#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
 namespace dpctl
@@ -118,6 +119,8 @@ py_extract(const dpctl::tensor::usm_ndarray &src,
            sycl::queue &exec_q,
            const std::vector<sycl::event> &depends)
 {
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+
     int src_nd = src.get_ndim();
     if ((axis_start < 0 || axis_end > src_nd || axis_start >= axis_end)) {
         throw py::value_error("Specified axes_start and axes_end are invalid.");
@@ -171,19 +174,8 @@ py_extract(const dpctl::tensor::usm_ndarray &src,
         throw py::value_error("Inconsistent array dimensions");
     }
 
-    // ensure that dst is sufficiently ample
-    auto dst_offsets = dst.get_minmax_offsets();
-    // destination must be ample enough to accommodate all elements
-    {
-        size_t range =
-            static_cast<size_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < static_cast<size_t>(ortho_nelems * masked_dst_nelems)) {
-            throw py::value_error(
-                "Memory addressed by the destination array can not "
-                "accommodate all the "
-                "array elements.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(
+        dst, ortho_nelems * masked_dst_nelems);
 
     auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
     // check that dst does not intersect with src, not with cumsum.
@@ -452,6 +444,8 @@ py_place(const dpctl::tensor::usm_ndarray &dst,
          sycl::queue &exec_q,
          const std::vector<sycl::event> &depends)
 {
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+
     int dst_nd = dst.get_ndim();
     if ((axis_start < 0 || axis_end > dst_nd || axis_start >= axis_end)) {
         throw py::value_error("Specified axes_start and axes_end are invalid.");
@@ -502,19 +496,8 @@ py_place(const dpctl::tensor::usm_ndarray &dst,
         throw py::value_error("Inconsistent array dimensions");
     }
 
-    // ensure that dst is sufficiently ample
-    auto dst_offsets = dst.get_minmax_offsets();
-    // destination must be ample enough to accommodate all elements
-    {
-        size_t range =
-            static_cast<size_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < static_cast<size_t>(ortho_nelems * masked_dst_nelems)) {
-            throw py::value_error(
-                "Memory addressed by the destination array can not "
-                "accommodate all the "
-                "array elements.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(
+        dst, ortho_nelems * masked_dst_nelems);
 
     auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
     // check that dst does not intersect with src, not with cumsum.
@@ -726,6 +709,8 @@ py_nonzero(const dpctl::tensor::usm_ndarray
             "Execution queue is not compatible with allocation queues");
     }
 
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(indexes);
+
     int cumsum_nd = cumsum.get_ndim();
     if (cumsum_nd != 1 || !cumsum.is_c_contiguous()) {
         throw py::value_error("Cumsum array must be a C-contiguous vector");
@@ -787,18 +772,8 @@ py_nonzero(const dpctl::tensor::usm_ndarray
         throw py::value_error("Arrays are expected to ave no memory overlap");
     }
 
-    // ensure that dst is sufficiently ample
-    auto indexes_offsets = indexes.get_minmax_offsets();
-    // destination must be ample enough to accommodate all elements
-    {
-        size_t range =
-            static_cast<size_t>(indexes_offsets.second - indexes_offsets.first);
-        if (range + 1 < static_cast<size_t>(nz_elems * _ndim)) {
-            throw py::value_error(
-                "Memory addressed by the destination array can not "
-                "accommodate all the array elements.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(
+        indexes, nz_elems * _ndim);
 
     std::vector<sycl::event> host_task_events;
     host_task_events.reserve(2);

@@ -37,6 +37,7 @@
 #include "simplify_iteration_space.hpp"
 #include "utils/memory_overlap.hpp"
 #include "utils/offset_utils.hpp"
+#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
 namespace dpctl
@@ -87,6 +88,8 @@ py_clip(const dpctl::tensor::usm_ndarray &src,
             "Execution queue is not compatible with allocation queues");
     }
 
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+
     int nd = src.get_ndim();
     int min_nd = min.get_ndim();
     int max_nd = max.get_ndim();
@@ -152,19 +155,7 @@ py_clip(const dpctl::tensor::usm_ndarray &src,
                               "have the same data type");
     }
 
-    // ensure that dst is sufficiently ample
-    auto dst_offsets = dst.get_minmax_offsets();
-    // destination must be ample enough to accommodate all elements
-    {
-        size_t range =
-            static_cast<size_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < static_cast<size_t>(nelems)) {
-            throw py::value_error(
-                "Memory addressed by the destination array can not "
-                "accommodate all the "
-                "array elements.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, nelems);
 
     char *src_data = src.get_data();
     char *min_data = min.get_data();

@@ -37,6 +37,7 @@
 #include "dpctl4pybind11.hpp"
 #include "kernels/copy_and_cast.hpp"
 #include "utils/memory_overlap.hpp"
+#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
@@ -100,24 +101,16 @@ copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src,
         return std::make_pair(sycl::event(), sycl::event());
     }
 
-    // destination must be ample enough to accommodate all elements
-    {
-        auto dst_offsets = dst.get_minmax_offsets();
-        size_t range =
-            static_cast<size_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < src_nelems) {
-            throw py::value_error(
-                "Destination array can not accommodate all the "
-                "elements of source array.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
 
     // check compatibility of execution queue and allocation queue
     if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
         throw py::value_error(
             "Execution queue is not compatible with allocation queues");
     }
 
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+
     int src_typenum = src.get_typenum();
     int dst_typenum = dst.get_typenum();
 

@@ -29,6 +29,7 @@
 #include "copy_for_reshape.hpp"
 #include "dpctl4pybind11.hpp"
 #include "kernels/copy_and_cast.hpp"
+#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 #include <pybind11/pybind11.h>
 
@@ -87,24 +88,16 @@ copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src,
         return std::make_pair(sycl::event(), sycl::event());
     }
 
-    // destination must be ample enough to accommodate all elements
-    {
-        auto dst_offsets = dst.get_minmax_offsets();
-        py::ssize_t range =
-            static_cast<py::ssize_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < src_nelems) {
-            throw py::value_error(
-                "Destination array can not accommodate all the "
-                "elements of source array.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
 
     // check same contexts
     if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
         throw py::value_error(
             "Execution queue is not compatible with allocation queues");
     }
 
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+
     if (src_nelems == 1) {
         // handle special case of 1-element array
         int src_elemsize = src.get_elemsize();

@@ -29,6 +29,7 @@
 #include "copy_for_roll.hpp"
 #include "dpctl4pybind11.hpp"
 #include "kernels/copy_and_cast.hpp"
+#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 #include <pybind11/pybind11.h>
 
@@ -110,24 +111,16 @@ copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src,
         return std::make_pair(sycl::event(), sycl::event());
     }
 
-    // destination must be ample enough to accommodate all elements
-    {
-        auto dst_offsets = dst.get_minmax_offsets();
-        py::ssize_t range =
-            static_cast<py::ssize_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < src_nelems) {
-            throw py::value_error(
-                "Destination array can not accommodate all the "
-                "elements of source array.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
 
     // check same contexts
     if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
         throw py::value_error(
             "Execution queue is not compatible with allocation queues");
     }
 
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+
     if (src_nelems == 1) {
         // handle special case of 1-element array
         int src_elemsize = src.get_elemsize();
@@ -298,17 +291,7 @@ copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src,
         return std::make_pair(sycl::event(), sycl::event());
     }
 
-    // destination must be ample enough to accommodate all elements
-    {
-        auto dst_offsets = dst.get_minmax_offsets();
-        py::ssize_t range =
-            static_cast<py::ssize_t>(dst_offsets.second - dst_offsets.first);
-        if (range + 1 < src_nelems) {
-            throw py::value_error(
-                "Destination array can not accommodate all the "
-                "elements of source array.");
-        }
-    }
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
 
     // check for compatible queues
     if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {