intel · sommerlukas · Mar 11, 2024 · Mar 5, 2024 · Mar 7, 2024 · Mar 8, 2024
@@ -1048,16 +1048,6 @@ Functionality is provided to represent a pair of integers as a `double`.
 in the high & low 32-bits respectively. `cast_double_to_int` casts the high or
 low 32-bits back into an integer.
 
-`syclcompat::fast_length` provides a wrapper to SYCL's
-`fast_length(sycl::vec<float,N>)` that accepts arguments for a C++ array and a
-length.
-
-`vectorized_max` and `vectorized_min` are binary operations returning the
-max/min of two arguments, where each argument is treated as a `sycl::vec` type.
-`vectorized_isgreater` performs elementwise `isgreater`, treating each argument
-as a vector of elements, and returning `0` for vector components for which
-`isgreater` is false, and `-1` when true.
-
 `reverse_bits` reverses the bits of a 32-bit unsigned integer, `ffs` returns the
 position of the first least significant set bit in an integer.
 `byte_level_permute` returns a byte-permutation of two input unsigned integers,
@@ -1073,61 +1063,34 @@ functionality to `sycl::select_from_group`, `sycl::shift_group_left`,
 However, they provide an optional argument to represent the `logical_group` size
 (default 32).
 
-The functions `cmul`,`cdiv`,`cabs`, and `conj` define complex math operations
-which accept `sycl::vec<T,2>` arguments representing complex values.
-
 ```c++
 namespace syclcompat {
 
 inline int cast_double_to_int(double d, bool use_high32 = true);
 
 inline double cast_ints_to_double(int high32, int low32);
 
-inline float fast_length(const float *a, int len);
-
-template <typename S, typename T> inline T vectorized_max(T a, T b);
-
-template <typename S, typename T> inline T vectorized_min(T a, T b);
-
-template <typename S, typename T> inline T vectorized_isgreater(T a, T b);
-
-template <>
-inline unsigned vectorized_isgreater<sycl::ushort2, unsigned>(unsigned a,
-                                                              unsigned b);
-
-template <typename T> inline T reverse_bits(T a);
-
 inline unsigned int byte_level_permute(unsigned int a, unsigned int b,
                                        unsigned int s);
 
-template <typename T> inline int ffs(T a);
+template <typename ValueT> inline int ffs(ValueT a);
 
-template <typename T>
-T select_from_sub_group(sycl::sub_group g, T x, int remote_local_id,
+template <typename ValueT>
+ValueT select_from_sub_group(sycl::sub_group g, ValueT x, int remote_local_id,
                         int logical_sub_group_size = 32);
 
-template <typename T>
-T shift_sub_group_left(sycl::sub_group g, T x, unsigned int delta,
+template <typename ValueT>
+ValueT shift_sub_group_left(sycl::sub_group g, ValueT x, unsigned int delta,
                        int logical_sub_group_size = 32);
 
-template <typename T>
-T shift_sub_group_right(sycl::sub_group g, T x, unsigned int delta,
+template <typename ValueT>
+ValueT shift_sub_group_right(sycl::sub_group g, ValueT x, unsigned int delta,
                         int logical_sub_group_size = 32);
 
-template <typename T>
-T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
+template <typename ValueT>
+ValueT permute_sub_group_by_xor(sycl::sub_group g, ValueT x, unsigned int mask,
                            int logical_sub_group_size = 32);
 
-template <typename T>
-sycl::vec<T, 2> cmul(sycl::vec<T, 2> x, sycl::vec<T, 2> y);
-
-template <typename T>
-sycl::vec<T, 2> cdiv(sycl::vec<T, 2> x, sycl::vec<T, 2> y);
-
-template <typename T> T cabs(sycl::vec<T, 2> x);
-
-template <typename T> sycl::vec<T, 2> conj(sycl::vec<T, 2> x);
-
 } // namespace syclcompat
 ```
 
@@ -1191,7 +1154,7 @@ int get_sycl_language_version();
 } // namespace syclcompat
 ```
 
-#### Kernel Helper Functions
+### Kernel Helper Functions
 
 Kernel helper functions provide a structure `kernel_function_info` to keep SYCL
 kernel information, and provide a utility function `get_kernel_function_info()`
@@ -1212,6 +1175,47 @@ static kernel_function_info get_kernel_function_info(const void *function);
 } // namespace syclcompat
 ```
 
+### Math Functions
+
+`syclcompat::fast_length` provides a wrapper to SYCL's
+`fast_length(sycl::vec<float,N>)` that accepts arguments for a C++ array and a
+length.
+
+`vectorized_max` and `vectorized_min` are binary operations returning the
+max/min of two arguments, where each argument is treated as a `sycl::vec` type.
+`vectorized_isgreater` performs elementwise `isgreater`, treating each argument
+as a vector of elements, and returning `0` for vector components for which
+`isgreater` is false, and `-1` when true.
+
+The functions `cmul`,`cdiv`,`cabs`, and `conj` define complex math operations
+which accept `sycl::vec<T,2>` arguments representing complex values.
+
+```cpp
+inline float fast_length(const float *a, int len);
+
+template <typename S, typename T> inline T vectorized_max(T a, T b);
+
+template <typename S, typename T> inline T vectorized_min(T a, T b);
+
+template <typename S, typename T> inline T vectorized_isgreater(T a, T b);
+
+template <>
+inline unsigned vectorized_isgreater<sycl::ushort2, unsigned>(unsigned a,
+                                                              unsigned b);
+
+template <typename T>
+sycl::vec<T, 2> cmul(sycl::vec<T, 2> x, sycl::vec<T, 2> y);
+
+template <typename T>
+sycl::vec<T, 2> cdiv(sycl::vec<T, 2> x, sycl::vec<T, 2> y);
+
+template <typename T> T cabs(sycl::vec<T, 2> x);
+
+template <typename T> sycl::vec<T, 2> conj(sycl::vec<T, 2> x);
+
+template <typename ValueT> inline ValueT reverse_bits(ValueT a);
+```
+
 ## Sample Code
 
 Below is a simple linear algebra sample, which computes `y = mx + b` implemented
@@ -1311,7 +1315,7 @@ int main(int argc, char **argv) {
 
   // Check output
   for (size_t i = 0; i < n_points; i++) {
-    assert(h_Y[i] - h_expected[i] < 1e6);
+    assert(h_Y[i] - h_expected[i] < 1e-6);
   }
 
   // Clean up memory

@@ -0,0 +1,185 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCL compatibility extension
+ *
+ *  math.hpp
+ *
+ *  Description:
+ *    math utilities for the SYCL compatibility extension.
+ **************************************************************************/
+
+// The original source was under the license below:
+//==---- math.hpp ---------------------------------*- C++ -*----------------==//
+//
+// Copyright (C) Intel Corporation
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/sycl.hpp>
+
+#ifndef SYCL_EXT_ONEAPI_COMPLEX
+#define SYCL_EXT_ONEAPI_COMPLEX
+#endif
+
+#include <sycl/ext/oneapi/experimental/complex/complex.hpp>
+
+namespace syclcompat {
+namespace detail {
+
+namespace complex_namespace = sycl::ext::oneapi::experimental;
+
+template <typename ValueT>
+using complex_type = detail::complex_namespace::complex<ValueT>;
+
+} // namespace detail
+
+/// Compute fast_length for variable-length array
+/// \param [in] a The array
+/// \param [in] len Length of the array
+/// \returns The computed fast_length
+inline float fast_length(const float *a, int len) {
+  switch (len) {
+  case 1:
+    return sycl::fast_length(a[0]);
+  case 2:
+    return sycl::fast_length(sycl::float2(a[0], a[1]));
+  case 3:
+    return sycl::fast_length(sycl::float3(a[0], a[1], a[2]));
+  case 4:
+    return sycl::fast_length(sycl::float4(a[0], a[1], a[2], a[3]));
+  case 0:
+    return 0;
+  default:
+    float f = 0;
+    for (int i = 0; i < len; ++i)
+      f += a[i] * a[i];
+    return sycl::sqrt(f);
+  }
+}
+
+/// Compute vectorized max for two values, with each value treated as a vector
+/// type \p S
+/// \param [in] S The type of the vector
+/// \param [in] T The type of the original values
+/// \param [in] a The first value
+/// \param [in] b The second value
+/// \returns The vectorized max of the two values
+template <typename S, typename T> inline T vectorized_max(T a, T b) {
+  sycl::vec<T, 1> v0{a}, v1{b};
+  auto v2 = v0.template as<S>();
+  auto v3 = v1.template as<S>();
+  v2 = sycl::max(v2, v3);
+  v0 = v2.template as<sycl::vec<T, 1>>();
+  return v0;
+}
+
+/// Compute vectorized min for two values, with each value treated as a vector
+/// type \p S
+/// \param [in] S The type of the vector
+/// \param [in] T The type of the original values
+/// \param [in] a The first value
+/// \param [in] b The second value
+/// \returns The vectorized min of the two values
+template <typename S, typename T> inline T vectorized_min(T a, T b) {
+  sycl::vec<T, 1> v0{a}, v1{b};
+  auto v2 = v0.template as<S>();
+  auto v3 = v1.template as<S>();
+  v2 = sycl::min(v2, v3);
+  v0 = v2.template as<sycl::vec<T, 1>>();
+  return v0;
+}
+
+/// Compute vectorized isgreater for two values, with each value treated as a
+/// vector type \p S
+/// \param [in] S The type of the vector
+/// \param [in] T The type of the original values
+/// \param [in] a The first value
+/// \param [in] b The second value
+/// \returns The vectorized greater than of the two values
+template <typename S, typename T> inline T vectorized_isgreater(T a, T b) {
+  sycl::vec<T, 1> v0{a}, v1{b};
+  auto v2 = v0.template as<S>();
+  auto v3 = v1.template as<S>();
+  auto v4 = sycl::isgreater(v2, v3);
+  v0 = v4.template as<sycl::vec<T, 1>>();
+  return v0;
+}
+
+/// Compute vectorized isgreater for two unsigned int values, with each value
+/// treated as a vector of two unsigned short
+/// \param [in] a The first value
+/// \param [in] b The second value
+/// \returns The vectorized greater than of the two values
+template <>
+inline unsigned vectorized_isgreater<sycl::ushort2, unsigned>(unsigned a,
+                                                              unsigned b) {
+  sycl::vec<unsigned, 1> v0{a}, v1{b};
+  auto v2 = v0.template as<sycl::ushort2>();
+  auto v3 = v1.template as<sycl::ushort2>();
+  sycl::ushort2 v4;
+  v4[0] = v2[0] > v3[0];
+  v4[1] = v2[1] > v3[1];
+  v0 = v4.template as<sycl::vec<unsigned, 1>>();
+  return v0;
+}
+
+/// Computes the multiplication of two complex numbers.
+/// \tparam T Complex element type
+/// \param [in] x The first input complex number
+/// \param [in] y The second input complex number
+/// \returns The result
+template <typename T>
+sycl::vec<T, 2> cmul(sycl::vec<T, 2> x, sycl::vec<T, 2> y) {
+  sycl::ext::oneapi::experimental::complex<T> t1(x[0], x[1]), t2(y[0], y[1]);
+  t1 = t1 * t2;
+  return sycl::vec<T, 2>(t1.real(), t1.imag());
+}
+
+/// Computes the division of two complex numbers.
+/// \tparam T Complex element type
+/// \param [in] x The first input complex number
+/// \param [in] y The second input complex number
+/// \returns The result
+template <typename T>
+sycl::vec<T, 2> cdiv(sycl::vec<T, 2> x, sycl::vec<T, 2> y) {
+  sycl::ext::oneapi::experimental::complex<T> t1(x[0], x[1]), t2(y[0], y[1]);
+  t1 = t1 / t2;
+  return sycl::vec<T, 2>(t1.real(), t1.imag());
+}
+
+/// Computes the magnitude of a complex number.
+/// \tparam T Complex element type
+/// \param [in] x The input complex number
+/// \returns The result
+template <typename T> T cabs(sycl::vec<T, 2> x) {
+  sycl::ext::oneapi::experimental::complex<T> t(x[0], x[1]);
+  return abs(t);
+}
+
+/// Computes the complex conjugate of a complex number.
+/// \tparam T Complex element type
+/// \param [in] x The input complex number
+/// \returns The result
+template <typename T> sycl::vec<T, 2> conj(sycl::vec<T, 2> x) {
+  sycl::ext::oneapi::experimental::complex<T> t(x[0], x[1]);
+  t = conj(t);
+  return sycl::vec<T, 2>(t.real(), t.imag());
+}
+
+} // namespace syclcompat
@@ -29,5 +29,6 @@
 #include <syclcompat/id_query.hpp>
 #include <syclcompat/kernel.hpp>
 #include <syclcompat/launch.hpp>
+#include <syclcompat/math.hpp>
 #include <syclcompat/memory.hpp>
 #include <syclcompat/util.hpp>