Skip to content

[SYCL][ESIMD] Add support for addc and subb operations #8758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 43 additions & 4 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Demangle/ItaniumDemangle.h"
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
Expand All @@ -36,6 +35,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"

#include <cctype>
#include <cstring>
Expand Down Expand Up @@ -672,12 +672,21 @@ class ESIMDIntrinDescTable {
{"slm_init", {"slm.init", {a(0)}}},
{"bf_cvt", {"bf.cvt", {a(0)}}},
{"tf32_cvt", {"tf32.cvt", {a(0)}}},
{"addc", {"addc", {l(0)}}},
{"subb", {"subb", {l(0)}}},
{"bfn", {"bfn", {a(0), a(1), a(2), t(0)}}}};
}

const IntrinTable &getTable() { return Table; }
};

static bool isStructureReturningFunction(StringRef FunctionName) {
return llvm::StringSwitch<bool>(FunctionName)
.Case("addc", true)
.Case("subb", true)
.Default(false);
}

// The C++11 "magic static" idiom to lazily initialize the ESIMD intrinsic table
static const IntrinTable &getIntrinTable() {
static ESIMDIntrinDescTable TheTable;
Expand Down Expand Up @@ -1418,6 +1427,8 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
SmallVector<Value *, 16> GenXArgs;
createESIMDIntrinsicArgs(Desc, GenXArgs, CI, FE);
Function *NewFDecl = nullptr;
bool DoesFunctionReturnStructure =
isStructureReturningFunction(Desc.GenXSpelling);
if (Desc.GenXSpelling.rfind("test.src.", 0) == 0) {
// Special case for testing purposes
NewFDecl = createTestESIMDDeclaration(Desc, GenXArgs, CI);
Expand All @@ -1426,8 +1437,17 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
GenXIntrinsic::getGenXIntrinsicPrefix() + Desc.GenXSpelling + Suffix);

SmallVector<Type *, 16> GenXOverloadedTypes;
if (GenXIntrinsic::isOverloadedRet(ID))
GenXOverloadedTypes.push_back(CI.getType());
if (GenXIntrinsic::isOverloadedRet(ID)) {
if (DoesFunctionReturnStructure) {
// TODO implement more generic handling of returned structure
// current code assumes that returned code has 2 members of the
// same type as arguments.
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
} else {
GenXOverloadedTypes.push_back(CI.getType());
}
}
for (unsigned i = 0; i < GenXArgs.size(); ++i)
if (GenXIntrinsic::isOverloadedArg(ID, i))
GenXOverloadedTypes.push_back(GenXArgs[i]->getType());
Expand All @@ -1441,15 +1461,34 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
NewFDecl->getFnAttribute(llvm::Attribute::ReadNone).isValid();
if (FixReadNone)
NewFDecl->removeFnAttr(llvm::Attribute::ReadNone);
Instruction *NewInst = nullptr;
AddrSpaceCastInst *CastInstruction = nullptr;
if (DoesFunctionReturnStructure) {
llvm::esimd::assert_and_diag(
isa<AddrSpaceCastInst>(GenXArgs[0]),
"Unexpected instruction for returning a structure from a function.");
CastInstruction = static_cast<AddrSpaceCastInst *>(GenXArgs[0]);
// Remove 1st argument that is used to return the structure
GenXArgs.erase(GenXArgs.begin());
}

CallInst *NewCI = IntrinsicInst::Create(
NewFDecl, GenXArgs,
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
&CI);
if (FixReadNone)
NewCI->setMemoryEffects(MemoryEffects::none());
NewCI->setDebugLoc(CI.getDebugLoc());
if (DoesFunctionReturnStructure) {
IRBuilder<> Builder(&CI);

NewInst = Builder.CreateStore(
NewCI, Builder.CreateBitCast(CastInstruction->getPointerOperand(),
NewCI->getType()->getPointerTo()));
} else {
NewInst = addCastInstIfNeeded(&CI, NewCI);
}

Instruction *NewInst = addCastInstIfNeeded(&CI, NewCI);
CI.replaceAllUsesWith(NewInst);
CI.eraseFromParent();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,47 @@ __esimd_dpasw_nosrc0(__ESIMD_DNS::vector_type_t<T1, N1> src1,
}
#endif // !__SYCL_DEVICE_ONLY__

template <typename T, int N>
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
__ESIMD_DNS::vector_type_t<T, N>>
__esimd_addc(__ESIMD_DNS::vector_type_t<T, N> src0,
__ESIMD_DNS::vector_type_t<T, N> src1)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // !__SYCL_DEVICE_ONLY__
{
__ESIMD_NS::simd<uint64_t, N> Result64 = __ESIMD_NS::simd<T, N>(src0);
Result64 += __ESIMD_NS::simd<T, N>(src1);
auto Result32 = Result64.template bit_cast_view<T>();
__ESIMD_NS::simd<uint32_t, N> CarryV = Result32.template select<N, 2>(1);
__ESIMD_NS::simd<uint32_t, N> ResV = Result32.template select<N, 2>(0);
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
ReturnValue = std::make_pair(CarryV.data(), ResV.data());
return ReturnValue;
}
#endif // !__SYCL_DEVICE_ONLY__

template <typename T, int N>
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
__ESIMD_DNS::vector_type_t<T, N>>
__esimd_subb(__ESIMD_DNS::vector_type_t<T, N> src0,
__ESIMD_DNS::vector_type_t<T, N> src1)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // !__SYCL_DEVICE_ONLY__
{
__ESIMD_NS::simd<uint64_t, N> Result64 = __ESIMD_NS::simd<T, N>(src0);
Result64 -= __ESIMD_NS::simd<T, N>(src1);
auto Result32 = Result64.template bit_cast_view<T>();
__ESIMD_NS::simd<uint32_t, N> BorrowV =
__ESIMD_NS::simd<T, N>(src0) < __ESIMD_NS::simd<T, N>(src1);
__ESIMD_NS::simd<uint32_t, N> ResV = Result32.template select<N, 2>(0);
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
ReturnValue = std::make_pair(BorrowV.data(), ResV.data());
return ReturnValue;
}
#endif // !__SYCL_DEVICE_ONLY__

template <uint8_t FuncControl, typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__esimd_bfn(__ESIMD_raw_vec_t(T, N) src0, __ESIMD_raw_vec_t(T, N) src1,
Expand Down
74 changes: 74 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,80 @@ imul(T &rmd, T0 src0, T1 src1) {
return Res[0];
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
__ESIMD_DNS::vector_type_t<uint32_t, N>>
Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());

carry = Result.first;
return Result.second;
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
uint32_t src1) {
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
return addc(carry, src0, Src1V);
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
return addc(carry, Src0V, src1);
}

__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
__ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
__ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
carry = CarryV[0];
return Res[0];
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
__ESIMD_DNS::vector_type_t<uint32_t, N>>
Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());

borrow = Result.first;
return Result.second;
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
uint32_t src1) {
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
return subb(borrow, src0, Src1V);
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
return subb(borrow, Src0V, src1);
}

__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
__ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
__ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
borrow = BorrowV[0];
return Res[0];
}

/// Integral quotient (vector version)
/// @tparam T element type of the input and return vectors.
/// @tparam SZ size of the input and returned vectors.
Expand Down
163 changes: 163 additions & 0 deletions sycl/test-e2e/ESIMD/addc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
//==---------------- addc.cpp - DPC++ ESIMD on-device test ---------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// The test verifies ESIMD API that adds 2 32-bit integer scalars/vectors with
// carry returning the result as 2 parts: carry flag the input modified operand
// and addition result as return from function.

#include "esimd_test_utils.hpp"

#include <iostream>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::intel::esimd;
namespace iesimd = sycl::ext::intel::experimental::esimd;

template <int N, bool AIsVector, bool BIsVector> bool test(sycl::queue Q) {
static_assert(AIsVector || BIsVector || N == 1,
"(Scalar + Scalar) case must have N==1");

uint32_t ValuesToTryHost[] = {0,
1,
static_cast<uint32_t>(-1),
0x7f,
static_cast<uint32_t>(-0x7f),
0x7fff,
static_cast<uint32_t>(-0x7fff),
0x7ffff,
static_cast<uint32_t>(-0x7ffff),
0x7ffffff,
0x80,
static_cast<uint32_t>(-0x80),
0x8000,
static_cast<uint32_t>(-0x8000),
0x800000,
static_cast<uint32_t>(-0x800000),
0x80000000};
uint32_t ValuesToTrySize = sizeof(ValuesToTryHost) / sizeof(uint32_t);

std::cout << "Running case: N=" << N << ", AIsVector=" << AIsVector
<< ", BIsVector=" << BIsVector << std::endl;

auto ValuesToTryUPtr =
esimd_test::usm_malloc_shared<uint32_t>(Q, ValuesToTrySize);
uint32_t *ValuesToTryPtr = ValuesToTryUPtr.get();
memcpy(ValuesToTryPtr, ValuesToTryHost, ValuesToTrySize * sizeof(uint32_t));

auto ResultsMatrixUPtr = esimd_test::usm_malloc_shared<uint32_t>(
Q, ValuesToTrySize * ValuesToTrySize * N);
auto CarryMatrixUPtr = esimd_test::usm_malloc_shared<uint32_t>(
Q, ValuesToTrySize * ValuesToTrySize * N);
uint32_t *ResultsMatrixPtr = ResultsMatrixUPtr.get();
uint32_t *CarryMatrixPtr = CarryMatrixUPtr.get();

try {
Q.single_task([=]() SYCL_ESIMD_KERNEL {
simd<uint32_t, N> VecInc(0, 1);
for (int AI = 0; AI < ValuesToTrySize; AI++) {
using AType =
std::conditional_t<AIsVector, simd<uint32_t, N>, uint32_t>;
uint32_t AScalar = simd<uint32_t, 1>(
reinterpret_cast<uint32_t *>(ValuesToTryPtr) + AI)[0];
AType A = AScalar;
if constexpr (AIsVector)
A += VecInc;

for (int BI = 0; BI < ValuesToTrySize; BI++) {
using BType =
std::conditional_t<BIsVector, simd<uint32_t, N>, uint32_t>;
uint32_t BScalar = simd<uint32_t, 1>(
reinterpret_cast<uint32_t *>(ValuesToTryPtr) + BI)[0];
BType B = BScalar;
if constexpr (BIsVector)
B += VecInc;

using ResType = std::conditional_t<AIsVector || BIsVector,
simd<uint32_t, N>, uint32_t>;
ResType Carry = 0;
ResType Res = iesimd::addc(Carry, A, B);

if constexpr (AIsVector || BIsVector) {
Carry.copy_to(CarryMatrixPtr + (ValuesToTrySize * AI + BI) * N);
Res.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N);
} else {
simd<uint32_t, 1> Carry1 = Carry;
simd<uint32_t, 1> Res1 = Res;
Carry1.copy_to(CarryMatrixPtr + (ValuesToTrySize * AI + BI) * N);
Res1.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N);
}

} // end for BI
} // end for AI
}).wait();
} catch (sycl::exception const &e) {
std::cout << "SYCL exception caught: " << e.what() << '\n';
return 1;
}

using Result64T = uint64_t;
int NumErrors = 0;
for (int AI = 0; AI < ValuesToTrySize; AI++) {
for (int BI = 0; BI < ValuesToTrySize; BI++) {
for (int I = 0; I < N; I++) {
uint32_t A = ValuesToTryHost[AI];
if constexpr (AIsVector)
A += I;
uint32_t B = ValuesToTryHost[BI];
if constexpr (BIsVector)
B += I;
Result64T R = static_cast<uint32_t>(A);
R += static_cast<uint32_t>(B);

uint32_t ExpectedRes = R & 0xffffffff;
uint32_t ExpectedCarry = (R >> 32) & 0xffffffff;
uint32_t ComputedRes =
ResultsMatrixPtr[(AI * ValuesToTrySize + BI) * N + I];
uint32_t ComputedCarry =
CarryMatrixPtr[(AI * ValuesToTrySize + BI) * N + I];
if (ComputedRes != ExpectedRes || ComputedCarry != ExpectedCarry) {
std::cout << "Error for (" << AI << "," << BI << "): " << A << " + "
<< B << " is Computed(" << ComputedCarry << ","
<< ComputedRes << ") != Expected (" << ExpectedCarry << ","
<< ExpectedRes << "), R = " << R << "\n";
NumErrors++;
}
}
}
}

return NumErrors == 0;
}

int main() {
queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
auto D = Q.get_device();
std::cout << "Running on " << D.get_info<info::device::name>() << "\n";

constexpr bool AIsVector = true;
constexpr bool BIsVector = true;
bool Pass = true;
Pass &= test<16, AIsVector, BIsVector>(Q);
Pass &= test<8, AIsVector, !BIsVector>(Q);
Pass &= test<4, !AIsVector, BIsVector>(Q);

Pass &= test<1, AIsVector, BIsVector>(Q);
Pass &= test<1, AIsVector, !BIsVector>(Q);
Pass &= test<1, !AIsVector, BIsVector>(Q);

Pass &= test<1, !AIsVector, !BIsVector>(Q);

std::cout << (Pass > 0 ? "Passed\n" : "FAILED\n");
return Pass ? 0 : 1;
}
Loading