Merge pull request #5 from oneapi-src/master

JoeOster · web-flow · commit 2e989dfa4fd1 · 2020-08-27T11:59:23.000-07:00
update
diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp
@@ -164,6 +164,126 @@ struct slice_area {
   };
 };
 
+
+// a way to get value_type from both accessors and USM that is needed for transform_init
+template <typename Unknown>
+struct accessor_traits
+{
+};
+
+template <typename T, int Dim, sycl::access::mode AccMode, sycl::access::target AccTarget,
+          sycl::access::placeholder Placeholder>
+struct accessor_traits<sycl::accessor<T, Dim, AccMode, AccTarget, Placeholder>>
+{
+    using value_type = typename sycl::accessor<T, Dim, AccMode, AccTarget, Placeholder>::value_type;
+};
+
+template <typename RawArrayValueType>
+struct accessor_traits<RawArrayValueType*>
+{
+    using value_type = RawArrayValueType;
+};
+
+// calculate shift where we should start processing on current item
+template <typename NDItemId, typename GlobalIdx, typename SizeNIter, typename SizeN>
+SizeN
+calc_shift(const NDItemId item_id, const GlobalIdx global_idx, SizeNIter& n_iter, const SizeN n)
+{
+    auto global_range_size = item_id.get_global_range().size();
+
+    auto start = n_iter * global_idx;
+    auto global_shift = global_idx + n_iter * global_range_size;
+    if (n_iter > 0 && global_shift > n)
+    {
+        start += n % global_range_size - global_idx;
+    }
+    else if (global_shift < n)
+    {
+        n_iter++;
+    }
+    return start;
+}
+
+
+template <typename ExecutionPolicy, typename Operation1, typename Operation2>
+struct transform_init
+{
+    Operation1 binary_op;
+    Operation2 unary_op;
+
+    template <typename NDItemId, typename GlobalIdx, typename Size, typename AccLocal, typename... Acc>
+    void
+    operator()(const NDItemId item_id, const GlobalIdx global_idx, Size n, AccLocal& local_mem,
+               const Acc&... acc)
+    {
+        auto local_idx = item_id.get_local_id(0);
+        auto global_range_size = item_id.get_global_range().size();
+        auto n_iter = n / global_range_size;
+        auto start = calc_shift(item_id, global_idx, n_iter, n);
+        auto shifted_global_idx = global_idx + start;
+
+        typename accessor_traits<AccLocal>::value_type res;
+        if (global_idx < n)
+        {
+            res = unary_op(shifted_global_idx, acc...);
+        }
+        // Add neighbour to the current local_mem
+        for (decltype(n_iter) i = 1; i < n_iter; ++i)
+        {
+            res = binary_op(res, unary_op(shifted_global_idx + i, acc...));
+        }
+        if (global_idx < n)
+        {
+            local_mem[local_idx] = res;
+        }
+    }
+};
+
+
+// Reduce on local memory
+template <typename ExecutionPolicy, typename BinaryOperation1, typename Tp>
+struct reduce
+{
+    BinaryOperation1 bin_op1;
+
+    template <typename NDItemId, typename GlobalIdx, typename Size, typename AccLocal>
+    Tp
+    operator()(const NDItemId item_id, const GlobalIdx global_idx, const Size n, AccLocal& local_mem)
+    {
+        auto local_idx = item_id.get_local_id(0);
+        auto group_size = item_id.get_local_range().size();
+
+        auto k = 1;
+        do
+        {
+            item_id.barrier(sycl::access::fence_space::local_space);
+            if (local_idx % (2 * k) == 0 && local_idx + k < group_size && global_idx < n &&
+                global_idx + k < n)
+            {
+                local_mem[local_idx] = bin_op1(local_mem[local_idx], local_mem[local_idx + k]);
+            }
+            k *= 2;
+        } while (k < group_size);
+        return local_mem[local_idx];
+    }
+};
+
+
+// walk through the data
+template <typename ExecutionPolicy, typename F>
+struct walk_n
+{
+    F f;
+
+    template <typename ItemId, typename... Ranges>
+    auto
+    operator()(const ItemId idx, Ranges&&... rngs) -> decltype(f(rngs[idx]...))
+    {
+        return f(rngs[idx]...);
+    }
+};
+
+
 // This option uses a parallel for to fill the buffer and then
 // uses a tranform_init with plus/no_op and then
 // a local reduction then global reduction.
@@ -189,21 +309,18 @@ float calc_pi_dpstd_native3(size_t num_steps, int groups, Policy&& policy) {
   auto calc_begin = oneapi::dpl::begin(buf);
   auto calc_end = oneapi::dpl::end(buf);
 
-  using Functor = oneapi::dpl::unseq_backend::walk_n<Policy, my_no_op>;
+  using Functor = walk_n<Policy, my_no_op>;
   float result;
 
   // Functor will do nothing for tranform_init and will use plus for reduce.
   // In this example we have done the calculation and filled the buffer above
   // The way transform_init works is that you need to have the value already
   // populated in the buffer.
-  auto tf_init =
-      oneapi::dpl::unseq_backend::transform_init<Policy, std::plus<float>,
-                                                 Functor>{std::plus<float>(),
-                                                          Functor{my_no_op()}};
+  auto tf_init = transform_init<Policy, std::plus<float>,
+                   Functor>{std::plus<float>(), Functor{my_no_op()}};
 
   auto combine = std::plus<float>();
-  auto brick_reduce =
-      oneapi::dpl::unseq_backend::reduce<Policy, std::plus<float>, float>{
+  auto brick_reduce = reduce<Policy, std::plus<float>, float>{
           std::plus<float>()};
   auto workgroup_size =
       policy.queue()
@@ -295,19 +412,17 @@ float calc_pi_dpstd_native4(size_t num_steps, int groups, Policy&& policy) {
   auto calc_begin = oneapi::dpl::begin(buf2);
   auto calc_end = oneapi::dpl::end(buf2);
 
-  using Functor2 = oneapi::dpl::unseq_backend::walk_n<Policy, slice_area>;
+  using Functor2 = walk_n<Policy, slice_area>;
 
   // The buffer has 1...num it at and now we will use that as an input
   // to the slice structue which will calculate the area of each
   // rectangle.
-  auto tf_init =
-      oneapi::dpl::unseq_backend::transform_init<Policy, std::plus<float>,
+  auto tf_init = transform_init<Policy, std::plus<float>,
                                                  Functor2>{
           std::plus<float>(), Functor2{slice_area(num_steps)}};
 
   auto combine = std::plus<float>();
-  auto brick_reduce =
-      oneapi::dpl::unseq_backend::reduce<Policy, std::plus<float>, float>{
+  auto brick_reduce = reduce<Policy, std::plus<float>, float>{
           std::plus<float>()};
 
   // get workgroup_size from the device
diff --git a/Tools/IoTConnectionTools/hello-iot-world/CMakeLists.txt b/Tools/IoTConnectionTools/hello-iot-world/CMakeLists.txt
@@ -1,13 +1,13 @@
 cmake_minimum_required (VERSION 3.0)
 project (HELLO-IOT-WORLD CXX)
 
-# Use the Intel C++ Compiler when available
+# Use Intel C++ Compiler Classic when available
 find_program(ICPC NAMES icpc icl)
 if(ICPC)
     set(CMAKE_CXX_COMPILER ${ICPC})
-    message(STATUS "Switching to the Intel(R) C++ Compiler from: ${ICPC}")
+    message(STATUS "Switching to the Intel(R) C++ Compiler Classic from: ${ICPC}")
 else()
-    message("-- Could not find the Intel(R) C++ Compiler on the system path. Did you setup the compiler environment?")
+    message("-- Could not find the Intel(R) C++ Compiler Classic on the system path. Did you setup the compiler environment?")
 endif()
 
 # Set default build type to RelWithDebInfo if not specified
diff --git a/Tools/IoTConnectionTools/hello-iot-world/README.md b/Tools/IoTConnectionTools/hello-iot-world/README.md
@@ -10,7 +10,7 @@ This project outputs the classic "Hello World" message along with the compiler i
 Any Intel® CPU
 
 ## Software requirements
-Intel® C++ Compiler
+Intel® C++ Compiler Classic
 
 ## How to build and run
 ### Linux CLI
@@ -43,7 +43,7 @@ nmake run
 ### IDE
 Use the Samples Plugin for Eclipse or Visual Studio to create and run the sample.
 
-You may need to source the `setvars` script distributed with oneAPI before launching the IDE to use the Intel® C++ Compiler or make it available as a toolchain in the IDE.
+You may need to source the `setvars` script distributed with oneAPI before launching the IDE to use the Intel® C++ Compiler Classic or make it available as a toolchain in the IDE.
 
 ### Additional Links
 Access the Getting Started Guides with the following links:
diff --git a/Tools/IoTConnectionTools/hello-iot-world/cpp/hello-iot-world.cpp b/Tools/IoTConnectionTools/hello-iot-world/cpp/hello-iot-world.cpp
@@ -12,10 +12,10 @@ using namespace std;
 int main() {
   cout << "Hello, Internet of Things World!" << endl;
 #ifdef __INTEL_COMPILER
-  cout << "The Intel(R) C++ Compiler was used for compiling this sample."
+  cout << "The Intel(R) C++ Compiler Classic was used for compiling this sample."
        << endl;
 #else
-  cout << "The Intel(R) C++ Compiler was not used for compiling this sample."
+  cout << "The Intel(R) C++ Compiler Classic was not used for compiling this sample."
        << endl;
 #endif
   return 0;
diff --git a/Tools/IoTConnectionTools/hello-iot-world/sample.json b/Tools/IoTConnectionTools/hello-iot-world/sample.json
@@ -1,7 +1,7 @@
 {
   "guid": "8EBFB820-A80C-4CC5-97DB-09B6161DDE1F",
   "name": "Hello IoT World",
-  "categories": ["Toolkit/Intel® oneAPI IoT Toolkit/Intel® C++ Compiler"],
+  "categories": ["Toolkit/Intel® oneAPI IoT Toolkit/Intel® C++ Compiler Classic"],
   "description": "This is a basic sample that outputs the classic 'Hello World' message along with the compiler identification string.",
   "os": ["linux", "windows"],
   "toolchain": ["icc", "gcc"],

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"guid": "8EBFB820-A80C-4CC5-97DB-09B6161DDE1F",`
`3`	`3`	`"name": "Hello IoT World",`
`4`		`- "categories": ["Toolkit/Intel® oneAPI IoT Toolkit/Intel® C++ Compiler"],`
	`4`	`+ "categories": ["Toolkit/Intel® oneAPI IoT Toolkit/Intel® C++ Compiler Classic"],`
`5`	`5`	`"description": "This is a basic sample that outputs the classic 'Hello World' message along with the compiler identification string.",`
`6`	`6`	`"os": ["linux", "windows"],`
`7`	`7`	`"toolchain": ["icc", "gcc"],`