Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Add a test for SYCL_INTEL_local_memory extension #176

Merged
merged 4 commits into from
Apr 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ SYCL/Functor @AlexeySachkov
SYCL/GroupAlgorithm @Pennycook @AlexeySachkov
SYCL/SubGroup @Pennycook @AlexeySachkov

# Group local memory
SYCL/GroupLocalMemory @sergey-semenov

# Hierarchical parallelism
SYCL/HierPar @kbobrovs

Expand Down
95 changes: 95 additions & 0 deletions SYCL/GroupLocalMemory/group_local_memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New directory is added. Could you please set correct code owner (in .github/CODEOWNERS)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added myself as the code owner. @Pennycook Do you mind being set as the second code owner for the group local memory tests? (I this can be done as a separate PR, so that it does not block merging this one).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Pennycook Do you mind being set as the second code owner for the group local memory tests?

Fine by me.

// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include <CL/sycl.hpp>

#include <cassert>
#include <vector>

constexpr size_t WgSize = 32;
constexpr size_t WgCount = 4;
constexpr size_t Size = WgSize * WgCount;

struct Foo {
Foo() = delete;
Foo(int Value, int &Counter) {
for (int I = 0; I < WgSize; ++I)
Values[I] = Value;
++Counter;
}
int Values[WgSize];
};

class KernelA;
class KernelB;

using namespace sycl;

int main() {
queue Q;
{
std::vector<int> Vec(Size, 0);
buffer<int, 1> Buf{Vec.data(), range<1>(Size)};
std::vector<int> CounterVec(WgCount, 0);
buffer<int, 1> CounterBuf{CounterVec.data(), range<1>(WgCount)};

Q.submit([&](handler &Cgh) {
auto Acc = Buf.get_access<access::mode::read_write>(Cgh);
auto CounterAcc = CounterBuf.get_access<access::mode::read_write>(Cgh);
Cgh.parallel_for<KernelA>(
nd_range<1>(range<1>(Size), range<1>(WgSize)), [=](nd_item<1> Item) {
// Some alternative (and functionally equivalent) ways to use this
// would be:
// auto Ptr = group_local_memory<Foo>(Item.get_group(), ...);
// Foo &Ref = *group_local_memory<Foo>(Item.get_group(), ...);
multi_ptr<Foo, access::address_space::local_space> Ptr =
group_local_memory<Foo>(Item.get_group(), 1,
CounterAcc[Item.get_group_linear_id()]);
Ptr->Values[Item.get_local_linear_id()] *=
Item.get_local_linear_id();

Item.barrier();
// Check that the memory is accessible from other work-items
size_t LocalIdx = Item.get_local_linear_id() ^ 1;
size_t GlobalIdx = Item.get_global_linear_id() ^ 1;
Acc[GlobalIdx] = Ptr->Values[LocalIdx];
});
});

auto Acc = Buf.get_access<access::mode::read>();
for (size_t I = 0; I < Size; ++I)
assert(Acc[I] == I % WgSize);

// Check that the constructor has been called once per work-group
auto CounterAcc = CounterBuf.get_access<access::mode::read>();
for (size_t I = 0; I < WgCount; ++I)
assert(CounterAcc[I] == 1);
}

{
std::vector<int> Vec(Size, 0);
buffer<int, 1> Buf{Vec.data(), range<1>(Size)};

Q.submit([&](handler &Cgh) {
auto Acc = Buf.get_access<access::mode::read_write>(Cgh);
Cgh.parallel_for<KernelB>(
nd_range<1>(range<1>(Size), range<1>(WgSize)), [=](nd_item<1> Item) {
multi_ptr<int[WgSize], access::address_space::local_space> Ptr =
group_local_memory_for_overwrite<int[WgSize]>(Item.get_group());
(*Ptr)[Item.get_local_linear_id()] = Item.get_local_linear_id();

Item.barrier();
// Check that the memory is accessible from other work-items
size_t LocalIdx = Item.get_local_linear_id() ^ 1;
size_t GlobalIdx = Item.get_global_linear_id() ^ 1;
Acc[GlobalIdx] = (*Ptr)[LocalIdx];
});
});

auto Acc = Buf.get_access<access::mode::read>();
for (size_t I = 0; I < Size; ++I)
assert(Acc[I] == I % WgSize);
}
}