Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Test bfloat16 fmax fmin #1444

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions SYCL/BFloat16/bfloat16_fmax_fmin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// UNSUPPORTED: hip
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
// Currently the feature is supported only on CPU and GPU, natively or by
// software emulation.
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

//==------- bfloat16_fmax_fmin.cpp - SYCL bfloat16 test for fmax, fmin------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//

#include "bfloat16_test_utils.hpp"

int main() {
sycl::queue DeviceQueue(sycl::default_selector_v);
std::cout << "Running on "
<< DeviceQueue.get_device().get_info<sycl::info::device::name>()
<< "\n";
{
std::initializer_list<Bfloat16StorageT> InputVec1 = {
0x0, 0x8000, 0x1, 0x7FC0, 0x7FC4, 0x7F80, 0xFF80, 0xFE84};
std::initializer_list<Bfloat16StorageT> InputVec2 = {
0x0, 0x0, 0x2, 0xAAAA, 0x7FCD, 0x7F7F, 0xFF7F, 0xAE44};
std::initializer_list<Bfloat16StorageT> MaxResVec = {
0x0, 0x0, 0x2, 0xAAAA, 0x7FC0, 0x7F80, 0xFF7F, 0xAE44};
std::initializer_list<Bfloat16StorageT> MinResVec = {
0x0, 0x8000, 0x1, 0xAAAA, 0x7FC0, 0x7F7F, 0xFF80, 0xFE84};

test_host2(InputVec1, InputVec2, MaxResVec,
BF16_FUNC2(sycl::ext::oneapi::experimental::fmax));
test_device2(DeviceQueue, InputVec1, InputVec2, MaxResVec,
BF16_FUNC2(sycl::ext::oneapi::experimental::fmax));

test_host2_marray<1>(InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_host2_marray<2>(InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_host2_marray<3>(InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_host2_marray<4>(InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_device2_marray<1>(DeviceQueue, InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_device2_marray<2>(DeviceQueue, InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_device2_marray<3>(DeviceQueue, InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));
test_device2_marray<4>(DeviceQueue, InputVec1, InputVec2, MaxResVec,
FUNC2(sycl::ext::oneapi::experimental::fmax));

test_host2(InputVec1, InputVec2, MinResVec,
BF16_FUNC2(sycl::ext::oneapi::experimental::fmin));
test_device2(DeviceQueue, InputVec1, InputVec2, MinResVec,
BF16_FUNC2(sycl::ext::oneapi::experimental::fmin));

test_host2_marray<1>(InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_host2_marray<2>(InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_host2_marray<3>(InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_host2_marray<4>(InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_device2_marray<1>(DeviceQueue, InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_device2_marray<2>(DeviceQueue, InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_device2_marray<3>(DeviceQueue, InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
test_device2_marray<4>(DeviceQueue, InputVec1, InputVec2, MinResVec,
FUNC2(sycl::ext::oneapi::experimental::fmin));
}
}
165 changes: 165 additions & 0 deletions SYCL/BFloat16/bfloat16_test_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <initializer_list>
#include <iostream>
#include <sycl/sycl.hpp>
#include <type_traits>

using Bfloat16StorageT = sycl::ext::oneapi::detail::Bfloat16StorageT;

template <class InputTy, class OutputTy, class FuncTy>
void test_host1(std::initializer_list<InputTy> Input,
std::initializer_list<OutputTy> RefOutput, FuncTy Func,
int Line = __builtin_LINE()) {
auto Size = Input.size();
assert(RefOutput.size() == Size);

for (int i = 0; i < Size; ++i) {
auto Expected = *(std::begin(RefOutput) + i);
auto Res = Func(*(std::begin(Input) + i));
if (Expected == Res)
continue;

std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res
<< " != " << Expected << std::endl;
assert(false);
}
}

template <class InputTy, class OutputTy, class FuncTy>
void test_host2(std::initializer_list<InputTy> Input1,
std::initializer_list<InputTy> Input2,
std::initializer_list<OutputTy> RefOutput, FuncTy Func,
int Line = __builtin_LINE()) {
auto Size = Input1.size();
assert((RefOutput.size() == Size) && (Input2.size() == Size));

for (int i = 0; i < Size; ++i) {
auto Expected = *(std::begin(RefOutput) + i);
auto Res = Func(*(std::begin(Input1) + i), *(std::begin(Input2) + i));
if (Expected == Res)
continue;

std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res
<< " != " << Expected << std::endl;
assert(false);
}
}

template <size_t N, class InputTy, class OutputTy, class FuncTy>
void test_host2_marray(std::initializer_list<InputTy> Input1,
std::initializer_list<InputTy> Input2,
std::initializer_list<OutputTy> RefOutput, FuncTy Func,
int Line = __builtin_LINE()) {
auto Size = Input1.size();
assert((RefOutput.size() == Size) && (Input2.size() == Size));
sycl::marray<sycl::ext::oneapi::bfloat16, N> MAInput1;
sycl::marray<sycl::ext::oneapi::bfloat16, N> MAInput2;
for (size_t i = 0; i < N; i++) {
MAInput1[i] =
sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input1) + i));
MAInput2[i] =
sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input2) + i));
}

sycl::marray<sycl::ext::oneapi::bfloat16, N> res = Func(MAInput1, MAInput2);
for (int i = 0; i < N; ++i) {
auto Expected = *(std::begin(RefOutput) + i);
auto Res = sycl::ext::oneapi::detail::bfloat16ToBits(res[i]);
if (Expected == Res)
continue;

std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res
<< " != " << Expected << std::endl;
assert(false);
}
}

template <class InputTy, class OutputTy, class FuncTy>
void test_device2(sycl::queue &Q, std::initializer_list<InputTy> Input1,
std::initializer_list<InputTy> Input2,
std::initializer_list<OutputTy> RefOutput, FuncTy Func,
int Line = __builtin_LINE()) {
auto Size = Input1.size();
assert(Size == Input2.size());
assert(RefOutput.size() == Size);

sycl::buffer<InputTy> InBuf1(Size);
sycl::buffer<InputTy> InBuf2(Size);
{
sycl::host_accessor InAcc1(InBuf1, sycl::write_only);
sycl::host_accessor InAcc2(InBuf2, sycl::write_only);
for (int i = 0; i < Size; ++i) {
InAcc1[i] = *(std::begin(Input1) + i);
InAcc2[i] = *(std::begin(Input2) + i);
}
}

sycl::buffer<OutputTy> OutBuf(Size);
Q.submit([&](sycl::handler &CGH) {
sycl::accessor InAcc1(InBuf1, CGH, sycl::read_only);
sycl::accessor InAcc2(InBuf2, CGH, sycl::read_only);
sycl::accessor OutAcc(OutBuf, CGH, sycl::write_only);
CGH.parallel_for(Size, [=](sycl::id<1> Id) {
OutAcc[Id] = Func(InAcc1[Id], InAcc2[Id]);
});
}).wait();

sycl::host_accessor Acc(OutBuf, sycl::read_only);
for (int i = 0; i < Size; ++i) {
auto Expected = *(std::begin(RefOutput) + i);
if (Expected == Acc[i])
continue;
std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Acc[i]
<< " != " << Expected << ", input idx was " << i << std::endl;
assert(false);
}
}

template <size_t N, class InputTy, class OutputTy, class FuncTy>
void test_device2_marray(sycl::queue &Q, std::initializer_list<InputTy> Input1,
std::initializer_list<InputTy> Input2,
std::initializer_list<OutputTy> RefOutput, FuncTy Func,
int Line = __builtin_LINE()) {
auto Size = Input1.size();
assert(Size == Input2.size());
assert(RefOutput.size() == Size);

sycl::marray<sycl::ext::oneapi::bfloat16, N> MAInput1;
sycl::marray<sycl::ext::oneapi::bfloat16, N> MAInput2;
for (size_t i = 0; i < N; i++) {
MAInput1[i] =
sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input1) + i));
MAInput2[i] =
sycl::ext::oneapi::detail::bitsToBfloat16(*(std::begin(Input2) + i));
}

sycl::marray<sycl::ext::oneapi::bfloat16, N> res;
{
sycl::buffer<sycl::marray<sycl::ext::oneapi::bfloat16, N>, 1> OutBuf(&res,
1);
Q.submit([&](sycl::handler &CGH) {
sycl::accessor OutAcc(OutBuf, CGH, sycl::write_only);
CGH.single_task([=]() { OutAcc[0] = Func(MAInput1, MAInput2); });
}).wait();
}

for (int i = 0; i < N; ++i) {
auto Expected = *(std::begin(RefOutput) + i);
auto Res = sycl::ext::oneapi::detail::bfloat16ToBits(res[i]);
if (Expected == Res)
continue;
std::cout << "Mismatch at line " << Line << "[" << i << "]: " << Res
<< " != " << Expected << ", input idx was " << i << std::endl;
assert(false);
}
}

#define FUNC2(Name) [](auto x, auto y) { return (Name)(x, y); }
#define BF16_FUNC2(Name) \
[](Bfloat16StorageT x, Bfloat16StorageT y) { \
return sycl::ext::oneapi::detail::bfloat16ToBits( \
(Name)(sycl::ext::oneapi::detail::bitsToBfloat16(x), \
sycl::ext::oneapi::detail::bitsToBfloat16(y))); \
}