Updated coding style to use accessor instead of get_access calls (#169)

terdner · JoeOster · web-flow · commit 6e33836d56cb · 2020-09-23T10:19:17.000-07:00
* initial commit of openMP example.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* Initial commit of the dpc_reduce

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added guid to sample.json

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* fixed sample.json files.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* fixed the include files.  Somehow I copied a slightly old repo and it still had &lt;chrono&gt; and the omp_common.hpp file.  They have been removed.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added license.txt file ran through formating tool one more time removed all calls to "std::endl" and replaced with " \n"

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* renamed license.txt to License.txt

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added "ciTests" to the sample.json file.  It passed the check.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* fixed make error

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* fixed sample.json

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* removed "2020" from the License.txt file due to update guidelines.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added comment regarding where you can find dpc_common in both files per Paul's comments.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* Modified names of the functions to represent what they do (ie. calc_pi_*) per suggestion from Paul.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* initial check-in to the C++ repo

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* put correct comment on dpc_common.hpp

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added commenting indicating where they can find corresponding include files.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added comment line

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* removed openMP repo from DPC++ as it will be moved to C++ directory

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* fixed category line in sample.json to match exact text expected.

* removing openMP from the DPC directory.  It has been moved to C++ directory.

* fixed tf_init call

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* removed all calls into PSTL internal logic.  This is what was causing fails between beta08 and beta09.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* fixed env variable to run on CPU

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* update Readme file to include information about setting
env variable to allocate more memory for any runs
on the cpu

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* added option in Cmake file to support unnamed lambda option.   You need this to compile if the environment doesn't have this set by default.

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* path to output file from compile has changed.  it no longer seems to create the src directory.

* started to remove get_access and change it to accessor name()

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* fixed remaining get_access

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

* removed commented out old code

Signed-off-by: todd.erdner &lt;todd.erdner@intel.com&gt;

Co-authored-by: JoeOster &lt;52936608+JoeOster@users.noreply.github.com&gt;
diff --git a/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp b/DirectProgramming/DPC++/ParallelPatterns/dpc_reduce/src/main.cpp
@@ -4,7 +4,6 @@
 // SPDX-License-Identifier: MIT
 // =============================================================
 #include <mpi.h>
-
 #include <CL/sycl.hpp>
 #include <iomanip>  // setprecision library
 #include <iostream>
@@ -72,7 +71,7 @@ float calc_pi_dpstd_native(size_t num_steps, Policy&& policy) {
   buffer<float, 1> buf{data, range<1>{num_steps}};
 
   policy.queue().submit([&](handler& h) {
-    auto writeresult = buf.get_access<access::mode::write>(h);
+    accessor writeresult(buf,h,write_only);
     h.parallel_for(range<1>{num_steps}, [=](id<1> idx) {
       float x = ((float)idx[0] - 0.5) / (float)num_steps;
       writeresult[idx[0]] = 4.0f / (1.0 + x * x);
@@ -83,15 +82,18 @@ float calc_pi_dpstd_native(size_t num_steps, Policy&& policy) {
   // Single task is needed here to make sure
   // data is not written over.
   policy.queue().submit([&](handler& h) {
-    auto a = buf.get_access<access::mode::read_write>(h);
+    accessor a(buf,h);
     h.single_task([=]() {
       for (int i = 1; i < num_steps; i++) a[0] += a[i];
     });
   });
   policy.queue().wait();
 
-  float mynewresult =
-      buf.get_access<access::mode::read>()[0] / (float)num_steps;
+
+  // float mynewresult = buf.get_access<access::mode::read>()[0] / (float)num_steps;
+  host_accessor answer(buf,read_only) ; 
+  float mynewresult = answer[0]/(float)num_steps; 
+  
   return mynewresult;
 }
 
@@ -109,7 +111,7 @@ float calc_pi_dpstd_native2(size_t num_steps, Policy&& policy, int group_size) {
 
   // fill buffer with calculations
   policy.queue().submit([&](handler& h) {
-    auto writeresult = buf.get_access<access::mode::write>(h);
+    accessor writeresult(buf, h, write_only); 
     h.parallel_for(range<1>{num_steps}, [=](id<1> idx) {
       float x = ((float)idx[0] - 0.5) / (float)num_steps;
       writeresult[idx[0]] = 4.0f / (1.0 + x * x);
@@ -126,8 +128,8 @@ float calc_pi_dpstd_native2(size_t num_steps, Policy&& policy, int group_size) {
   buffer<float, 1> bufc{c, range<1>{num_groups}};
   for (int j = 0; j < num_groups; j++) {
     policy.queue().submit([&](handler& h) {
-      auto my_a = buf.get_access<access::mode::read>(h);
-      auto my_c = bufc.get_access<access::mode::write>(h);
+      accessor my_a(buf,h,read_only);
+      accessor my_c(bufc,h,write_only); 
       h.single_task([=]() {
         for (int i = 0 + group_size * j; i < group_size + group_size * j; i++)
           my_c[j] += my_a[i];
@@ -136,7 +138,7 @@ float calc_pi_dpstd_native2(size_t num_steps, Policy&& policy, int group_size) {
   }
   policy.queue().wait();
 
-  auto src = bufc.get_access<access::mode::read>();
+  host_accessor src(bufc,read_only);
 
   // Sum up results on CPU
   float mynewresult = 0.0;
@@ -299,7 +301,8 @@ float calc_pi_dpstd_native3(size_t num_steps, int groups, Policy&& policy) {
 
   // fill the buffer with the calculation using parallel for
   policy.queue().submit([&](handler& h) {
-    auto writeresult = buf.get_access<access::mode::write>(h);
+    accessor writeresult(buf,h,write_only);
+
     h.parallel_for(range<1>{num_steps}, [=](id<1> idx) {
       float x = (float)idx[0] / (float)num_steps;
       writeresult[idx[0]] = 4.0f / (1.0f + x * x);
@@ -343,9 +346,8 @@ float calc_pi_dpstd_native3(size_t num_steps, int groups, Policy&& policy) {
   auto local_reduce_event =
       policy.queue().submit([&buf, &temp_buf, &brick_reduce, &tf_init,
                              num_steps, n_groups, workgroup_size](handler& h) {
-        auto access_buf = buf.template get_access<access::mode::read_write>(h);
-        auto temp_acc =
-            temp_buf.template get_access<access::mode::discard_write>(h);
+        accessor access_buf(buf,h);
+        accessor temp_acc(temp_buf,h,write_only);
         // Create temporary local buffer
         accessor<float, 1, access::mode::read_write, access::target::local>
             temp_buf_local(range<1>(workgroup_size), h);
@@ -373,8 +375,7 @@ float calc_pi_dpstd_native3(size_t num_steps, int groups, Policy&& policy) {
       reduce_event = policy.queue().submit([&reduce_event, &temp_buf, &combine,
                                             countby2, n_groups](handler& h) {
         h.depends_on(reduce_event);
-        auto temp_acc =
-            temp_buf.template get_access<access::mode::read_write>(h);
+        accessor temp_acc(temp_buf,h);
         h.parallel_for(range<1>(n_groups), [=](item<1> item_id) mutable {
           auto global_idx = item_id.get_linear_id();
 
@@ -388,10 +389,9 @@ float calc_pi_dpstd_native3(size_t num_steps, int groups, Policy&& policy) {
       countby2 *= 2;
     } while (countby2 < n_groups);
   }
-
-  float answer = temp_buf.template get_access<access::mode::read>()[0];
-  result = answer / (float)num_steps;
-  return result;
+  
+  host_accessor answer(temp_buf,read_only) ; 
+  return answer[0]/(float)num_steps; 
 }
 
 // dpstd_native4 fills a buffer with number 1...num_steps and then
@@ -406,7 +406,7 @@ float calc_pi_dpstd_native4(size_t num_steps, int groups, Policy&& policy) {
 
   // fill buffer with 1...num_steps
   policy.queue().submit([&](handler& h) {
-    auto writeresult = buf2.get_access<access::mode::write>(h);
+    accessor writeresult(buf2,h);
     h.parallel_for(range<1>{num_steps},
                    [=](id<1> idx) { writeresult[idx[0]] = (float)idx[0]; });
   });
@@ -453,9 +453,8 @@ float calc_pi_dpstd_native4(size_t num_steps, int groups, Policy&& policy) {
       policy.queue().submit([&buf2, &temp_buf, &brick_reduce, &tf_init,
                              num_steps, n_groups, workgroup_size](handler& h) {
         // grab access to the previous input
-        auto access_buf = buf2.template get_access<access::mode::read_write>(h);
-        auto temp_acc =
-            temp_buf.template get_access<access::mode::discard_write>(h);
+        accessor access_buf(buf2,h);
+        accessor temp_acc(temp_buf,h,write_only);
         // Create temporary local buffer
         accessor<float, 1, access::mode::read_write, access::target::local>
             temp_buf_local(range<1>(workgroup_size), h);
@@ -484,8 +483,7 @@ float calc_pi_dpstd_native4(size_t num_steps, int groups, Policy&& policy) {
       reduce_event = policy.queue().submit([&reduce_event, &temp_buf, &combine,
                                             countby2, n_groups](handler& h) {
         h.depends_on(reduce_event);
-        auto temp_acc =
-            temp_buf.template get_access<access::mode::read_write>(h);
+        accessor temp_acc(temp_buf,h);
         h.parallel_for(range<1>(n_groups), [=](item<1> item_id) mutable {
           auto global_idx = item_id.get_linear_id();
 
@@ -499,10 +497,8 @@ float calc_pi_dpstd_native4(size_t num_steps, int groups, Policy&& policy) {
       countby2 *= 2;
     } while (countby2 < n_groups);
   }
-  float answer = temp_buf.template get_access<access::mode::read_write>()[0];
-  result = answer / (float)num_steps;
-
-  return result;
+  host_accessor answer(temp_buf,read_only) ; 
+  return answer[0]/(float)num_steps; 
 }
 
 // This function shows the use of two different DPC++ library calls.
@@ -604,7 +600,7 @@ void mpi_native(float* results, int rank_num, int num_procs,
     // constructed at runtime.
     q.submit([&](handler& h) {
       // Accessors are used to get access to the memory owned by the buffers.
-      auto results_accessor = results_buf.get_access<access::mode::write>(h);
+      accessor results_accessor(results_buf,h,write_only);
       // Each kernel calculates a partial of the number Pi in parallel.
       h.parallel_for(num_items, [=](id<1> k) {
         float x = ((float)rank_num / (float)num_procs) + (float)k * dx + dx2;
@@ -786,7 +782,7 @@ int main(int argc, char** argv) {
     std::cout << "mpi transform_reduce:\t";
     std::cout << std::setprecision(3) << "PI =" << pi;
     std::cout << " in " << stop7 << " seconds\n";
-    std::cout << "succes\n";
+    std::cout << "success\n";
   }
 
   MPI_Finalize();