Begin moving ordered_queue to be a property on the queue. Can deprecate ordered_queue in the future.

jbrodman · jbrodman · commit 8ba1d4e90228 · 2020-01-24T10:25:29.000-05:00
Signed-off-by: James Brodman &lt;james.brodman@intel.com&gt;
diff --git a/sycl/include/CL/sycl/property_list.hpp b/sycl/include/CL/sycl/property_list.hpp
@@ -39,6 +39,8 @@ class context_bound;
 
 namespace queue {
 class enable_profiling;
+class in_order;
+class out_of_order;
 } // namespace queue
 
 namespace detail {
@@ -57,6 +59,8 @@ enum PropKind {
 
   // Queue properties
   QueueEnableProfiling,
+  InOrder,
+  OutOfOrder,
 
   PropKindSize
 };
@@ -110,6 +114,8 @@ RegisterProp(PropKind::BufferContextBound, buffer::context_bound);
 
 // Queue
 RegisterProp(PropKind::QueueEnableProfiling, queue::enable_profiling);
+RegisterProp(PropKind::InOrder, queue::in_order);
+RegisterProp(PropKind::OutOfOrder, queue::out_of_order);
 
 // Sentinel, needed for automatic build of tuple in property_list.
 RegisterProp(PropKind::PropKindSize, PropBase);
@@ -172,6 +178,10 @@ class context_bound
 namespace queue {
 class enable_profiling
     : public detail::Prop<detail::PropKind::QueueEnableProfiling> {};
+
+class in_order : public detail::Prop<detail::PropKind::InOrder> {};
+
+class out_of_order : public detail::Prop<detail::PropKind::OutOfOrder> {};
 } // namespace queue
 
 } // namespace property
diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp
@@ -13,6 +13,19 @@
 
 __SYCL_INLINE namespace cl {
 namespace sycl {
+
+namespace detail {
+
+QueueOrder getQueueOrder(const property_list &propList) {
+  if (propList.has_property<property::queue::in_order>()) {
+    return QueueOrder::Ordered;
+  } else {
+    return QueueOrder::OOO;
+  }
+}
+
+} // namespace detail
+
 queue::queue(const context &syclContext, const device_selector &deviceSelector,
              const async_handler &asyncHandler, const property_list &propList) {
 
@@ -23,16 +36,17 @@ queue::queue(const context &syclContext, const device_selector &deviceSelector,
   };
 
   const device &syclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp);
+
   impl = std::make_shared<detail::queue_impl>(
       detail::getSyclObjImpl(syclDevice), detail::getSyclObjImpl(syclContext),
-      asyncHandler, cl::sycl::detail::QueueOrder::OOO, propList);
+      asyncHandler, detail::getQueueOrder(propList), propList);
 }
 
 queue::queue(const device &syclDevice, const async_handler &asyncHandler,
              const property_list &propList) {
   impl = std::make_shared<detail::queue_impl>(
       detail::getSyclObjImpl(syclDevice), asyncHandler,
-      cl::sycl::detail::QueueOrder::OOO, propList);
+      detail::getQueueOrder(propList), propList);
 }
 
 queue::queue(cl_command_queue clQueue, const context &syclContext,
diff --git a/sycl/test/ordered_queue/ordered_prop_dmemll.cpp b/sycl/test/ordered_queue/ordered_prop_dmemll.cpp
@@ -0,0 +1,116 @@
+// RUN: %clangxx -fsycl %s -o %t1.out -lOpenCL
+// RUN: %CPU_RUN_PLACEHOLDER %t1.out
+// RUN: %GPU_RUN_PLACEHOLDER %t1.out
+
+//==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==//
+// It uses an ordered queue where explicit waiting is not necessary between
+// kernels
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <CL/sycl.hpp>
+
+using namespace cl::sycl;
+
+constexpr int numNodes = 4;
+
+bool getQueueOrder(cl_command_queue cq) {
+  cl_command_queue_properties reportedProps;
+  cl_int iRet = clGetCommandQueueInfo(
+      cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr);
+  assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
+  return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false
+                                                                  : true;
+}
+
+struct Node {
+  Node() : pNext(nullptr), Num(0xDEADBEEF) {}
+
+  Node *pNext;
+  uint32_t Num;
+};
+
+class foo;
+int main() {
+  queue q{property::queue::in_order()};
+  auto dev = q.get_device();
+  auto ctxt = q.get_context();
+  Node *d_head = nullptr;
+  Node *d_cur = nullptr;
+  Node h_cur;
+
+  d_head = (Node *)malloc_device(sizeof(Node), dev, ctxt);
+  if (d_head == nullptr) {
+    return -1;
+  }
+  d_cur = d_head;
+
+  for (int i = 0; i < numNodes; i++) {
+    h_cur.Num = i * 2;
+
+    if (i != (numNodes - 1)) {
+      h_cur.pNext = (Node *)malloc_device(sizeof(Node), dev, ctxt);
+      if (h_cur.pNext == nullptr) {
+        return -1;
+      }
+    } else {
+      h_cur.pNext = nullptr;
+    }
+
+    event e0 = q.memcpy(d_cur, &h_cur, sizeof(Node));
+    e0.wait();
+
+    d_cur = h_cur.pNext;
+  }
+
+  q.submit([=](handler &cgh) {
+    cgh.single_task<class foo>([=]() {
+      Node *pHead = d_head;
+      while (pHead) {
+        pHead->Num = pHead->Num * 2 + 1;
+        pHead = pHead->pNext;
+      }
+    });
+  });
+
+  q.submit([=](handler &cgh) {
+    cgh.single_task<class bar>([=]() {
+      Node *pHead = d_head;
+      while (pHead) {
+        pHead->Num = pHead->Num + 42;
+        pHead = pHead->pNext;
+      }
+    });
+  });
+
+  d_cur = d_head;
+  for (int i = 0; i < numNodes; i++) {
+    event c = q.memcpy(&h_cur, d_cur, sizeof(Node));
+    c.wait();
+    free(d_cur, ctxt);
+
+    const int want = i * 4 + 43;
+    if (h_cur.Num != want) {
+      std::cout << "Result mismatches " << h_cur.Num << " vs expected "
+                  << i * 4 + 43 << " for index " << i << std::endl;
+      return -1;
+    }
+    d_cur = h_cur.pNext;
+  }
+
+  bool result = true;
+  cl_command_queue cq = q.get(); 
+  bool expected_result = dev.is_host() ? true : getQueueOrder(cq);
+  if (expected_result != result) {
+    std::cout << "Resulting queue order is OOO but expected order is inorder"
+              << std::endl;
+
+    return -1;
+  }
+
+  return 0;
+}