Skip to content

[SYCL][ESIMD] Add support for addc and subb operations #8758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 54 additions & 14 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,12 +663,21 @@ class ESIMDIntrinDescTable {
{"test.src.tmpl.arg", {t(0), t1(1), t8(2), t16(3), t32(4), c8(17)}}},
{"slm_init", {"slm.init", {a(0)}}},
{"bf_cvt", {"bf.cvt", {a(0)}}},
{"tf32_cvt", {"tf32.cvt", {a(0)}}}};
{"tf32_cvt", {"tf32.cvt", {a(0)}}},
{"addc", {"addc", {l(0)}}},
{"subb", {"subb", {l(0)}}}};
}

const IntrinTable &getTable() { return Table; }
};

static bool isStructureReturningFunction(StringRef FunctionName) {
return llvm::StringSwitch<bool>(FunctionName)
.Case("addc", true)
.Case("subb", true)
.Default(false);
}

// The C++11 "magic static" idiom to lazily initialize the ESIMD intrinsic table
static const IntrinTable &getIntrinTable() {
static ESIMDIntrinDescTable TheTable;
Expand Down Expand Up @@ -1409,6 +1418,8 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
SmallVector<Value *, 16> GenXArgs;
createESIMDIntrinsicArgs(Desc, GenXArgs, CI, FE);
Function *NewFDecl = nullptr;
bool IsStructureReturningFunction =
isStructureReturningFunction(Desc.GenXSpelling);
if (Desc.GenXSpelling.rfind("test.src.", 0) == 0) {
// Special case for testing purposes
NewFDecl = createTestESIMDDeclaration(Desc, GenXArgs, CI);
Expand All @@ -1417,12 +1428,21 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
GenXIntrinsic::getGenXIntrinsicPrefix() + Desc.GenXSpelling + Suffix);

SmallVector<Type *, 16> GenXOverloadedTypes;
if (GenXIntrinsic::isOverloadedRet(ID))
GenXOverloadedTypes.push_back(CI.getType());
if (GenXIntrinsic::isOverloadedRet(ID)) {
if (IsStructureReturningFunction) {
// TODO implement more generic handling of returned structure
// current code assumes that returned code has 2 members of the
// same type as arguments.
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
} else {
GenXOverloadedTypes.push_back(CI.getType());
}
}
for (unsigned i = 0; i < GenXArgs.size(); ++i)
if (GenXIntrinsic::isOverloadedArg(ID, i))
if (GenXIntrinsic::isOverloadedArg(ID, i)) {
GenXOverloadedTypes.push_back(GenXArgs[i]->getType());

}
NewFDecl = GenXIntrinsic::getGenXDeclaration(CI.getModule(), ID,
GenXOverloadedTypes);
}
Expand All @@ -1432,15 +1452,35 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
NewFDecl->getFnAttribute(llvm::Attribute::ReadNone).isValid();
if (FixReadNone)
NewFDecl->removeFnAttr(llvm::Attribute::ReadNone);
CallInst *NewCI = IntrinsicInst::Create(
NewFDecl, GenXArgs,
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
&CI);
if (FixReadNone)
NewCI->setMemoryEffects(MemoryEffects::none());
NewCI->setDebugLoc(CI.getDebugLoc());

Instruction *NewInst = addCastInstIfNeeded(&CI, NewCI);
Instruction *NewInst = nullptr;
if (IsStructureReturningFunction) {
AddrSpaceCastInst *a = static_cast<AddrSpaceCastInst *>(GenXArgs[0]);

GenXArgs.erase(GenXArgs.begin());
CallInst *NewCI = IntrinsicInst::Create(
NewFDecl, GenXArgs,
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
&CI);
if (FixReadNone)
NewCI->setMemoryEffects(MemoryEffects::none());
NewCI->setDebugLoc(CI.getDebugLoc());

IRBuilder<> Builder(&CI);

NewInst = Builder.CreateStore(
NewCI, Builder.CreateBitCast(a->getPointerOperand(),
NewCI->getType()->getPointerTo()));
} else {
CallInst *NewCI = IntrinsicInst::Create(
NewFDecl, GenXArgs,
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
&CI);
if (FixReadNone)
NewCI->setMemoryEffects(MemoryEffects::none());
NewCI->setDebugLoc(CI.getDebugLoc());

NewInst = addCastInstIfNeeded(&CI, NewCI);
}
CI.replaceAllUsesWith(NewInst);
CI.eraseFromParent();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,47 @@ __esimd_dpasw_nosrc0(__ESIMD_DNS::vector_type_t<T1, N1> src1,
}
#endif // !__SYCL_DEVICE_ONLY__

template <typename T, int N>
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
__ESIMD_DNS::vector_type_t<T, N>>
__esimd_addc(__ESIMD_DNS::vector_type_t<T, N> src0,
__ESIMD_DNS::vector_type_t<T, N> src1)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // !__SYCL_DEVICE_ONLY__
{
__ESIMD_NS::simd<uint64_t, N> Result64 = __ESIMD_NS::simd<T, N>(src0);
Result64 += __ESIMD_NS::simd<T, N>(src1);
auto Result32 = Result64.template bit_cast_view<T>();
__ESIMD_NS::simd<uint32_t, N> CarryV = Result32.template select<N, 2>(1);
__ESIMD_NS::simd<uint32_t, N> ResV = Result32.template select<N, 2>(0);
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
ReturnValue = std::make_pair(CarryV.data(), ResV.data());
return ReturnValue;
}
#endif // !__SYCL_DEVICE_ONLY__

template <typename T, int N>
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
__ESIMD_DNS::vector_type_t<T, N>>
__esimd_subb(__ESIMD_DNS::vector_type_t<T, N> src0,
__ESIMD_DNS::vector_type_t<T, N> src1)
#ifdef __SYCL_DEVICE_ONLY__
;
#else // !__SYCL_DEVICE_ONLY__
{
__ESIMD_NS::simd<uint64_t, N> Result64 = __ESIMD_NS::simd<T, N>(src0);
Result64 -= __ESIMD_NS::simd<T, N>(src1);
auto Result32 = Result64.template bit_cast_view<T>();
__ESIMD_NS::simd<uint32_t, N> BorrowV =
__ESIMD_NS::simd<T, N>(src0) < __ESIMD_NS::simd<T, N>(src1);
__ESIMD_NS::simd<uint32_t, N> ResV = Result32.template select<N, 2>(0);
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
ReturnValue = std::make_pair(BorrowV.data(), ResV.data());
return ReturnValue;
}
#endif // !__SYCL_DEVICE_ONLY__

#undef __ESIMD_raw_vec_t
#undef __ESIMD_cpp_vec_t

Expand Down
74 changes: 74 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,80 @@ imul(T &rmd, T0 src0, T1 src1) {
return Res[0];
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
__ESIMD_DNS::vector_type_t<uint32_t, N>>
Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());

carry = Result.first;
return Result.second;
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
uint32_t src1) {
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
return addc(carry, src0, Src1V);
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
return addc(carry, Src0V, src1);
}

__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
__ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
__ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
carry = CarryV[0];
return Res[0];
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
__ESIMD_DNS::vector_type_t<uint32_t, N>>
Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());

borrow = Result.first;
return Result.second;
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
uint32_t src1) {
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
return subb(borrow, src0, Src1V);
}

template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
return subb(borrow, Src0V, src1);
}

__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
__ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
__ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
borrow = BorrowV[0];
return Res[0];
}

/// Integral quotient (vector version)
/// @tparam T element type of the input and return vectors.
/// @tparam SZ size of the input and returned vectors.
Expand Down