Skip to content

Commit 0a09893

Browse files
authored
[SYCL][ESIMD] Add support for addc and subb operations (#8758)
1 parent b135956 commit 0a09893

File tree

5 files changed

+486
-4
lines changed

5 files changed

+486
-4
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include "llvm/ADT/DenseSet.h"
2323
#include "llvm/ADT/SmallVector.h"
2424
#include "llvm/ADT/StringSwitch.h"
25-
#include "llvm/TargetParser/Triple.h"
2625
#include "llvm/Demangle/Demangle.h"
2726
#include "llvm/Demangle/ItaniumDemangle.h"
2827
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
@@ -36,6 +35,7 @@
3635
#include "llvm/Pass.h"
3736
#include "llvm/Support/ModRef.h"
3837
#include "llvm/Support/raw_ostream.h"
38+
#include "llvm/TargetParser/Triple.h"
3939

4040
#include <cctype>
4141
#include <cstring>
@@ -672,12 +672,21 @@ class ESIMDIntrinDescTable {
672672
{"slm_init", {"slm.init", {a(0)}}},
673673
{"bf_cvt", {"bf.cvt", {a(0)}}},
674674
{"tf32_cvt", {"tf32.cvt", {a(0)}}},
675+
{"addc", {"addc", {l(0)}}},
676+
{"subb", {"subb", {l(0)}}},
675677
{"bfn", {"bfn", {a(0), a(1), a(2), t(0)}}}};
676678
}
677679

678680
const IntrinTable &getTable() { return Table; }
679681
};
680682

683+
static bool isStructureReturningFunction(StringRef FunctionName) {
684+
return llvm::StringSwitch<bool>(FunctionName)
685+
.Case("addc", true)
686+
.Case("subb", true)
687+
.Default(false);
688+
}
689+
681690
// The C++11 "magic static" idiom to lazily initialize the ESIMD intrinsic table
682691
static const IntrinTable &getIntrinTable() {
683692
static ESIMDIntrinDescTable TheTable;
@@ -1418,6 +1427,8 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
14181427
SmallVector<Value *, 16> GenXArgs;
14191428
createESIMDIntrinsicArgs(Desc, GenXArgs, CI, FE);
14201429
Function *NewFDecl = nullptr;
1430+
bool DoesFunctionReturnStructure =
1431+
isStructureReturningFunction(Desc.GenXSpelling);
14211432
if (Desc.GenXSpelling.rfind("test.src.", 0) == 0) {
14221433
// Special case for testing purposes
14231434
NewFDecl = createTestESIMDDeclaration(Desc, GenXArgs, CI);
@@ -1426,8 +1437,17 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
14261437
GenXIntrinsic::getGenXIntrinsicPrefix() + Desc.GenXSpelling + Suffix);
14271438

14281439
SmallVector<Type *, 16> GenXOverloadedTypes;
1429-
if (GenXIntrinsic::isOverloadedRet(ID))
1430-
GenXOverloadedTypes.push_back(CI.getType());
1440+
if (GenXIntrinsic::isOverloadedRet(ID)) {
1441+
if (DoesFunctionReturnStructure) {
1442+
// TODO implement more generic handling of returned structure
1443+
// current code assumes that returned code has 2 members of the
1444+
// same type as arguments.
1445+
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
1446+
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
1447+
} else {
1448+
GenXOverloadedTypes.push_back(CI.getType());
1449+
}
1450+
}
14311451
for (unsigned i = 0; i < GenXArgs.size(); ++i)
14321452
if (GenXIntrinsic::isOverloadedArg(ID, i))
14331453
GenXOverloadedTypes.push_back(GenXArgs[i]->getType());
@@ -1441,15 +1461,34 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
14411461
NewFDecl->getFnAttribute(llvm::Attribute::ReadNone).isValid();
14421462
if (FixReadNone)
14431463
NewFDecl->removeFnAttr(llvm::Attribute::ReadNone);
1464+
Instruction *NewInst = nullptr;
1465+
AddrSpaceCastInst *CastInstruction = nullptr;
1466+
if (DoesFunctionReturnStructure) {
1467+
llvm::esimd::assert_and_diag(
1468+
isa<AddrSpaceCastInst>(GenXArgs[0]),
1469+
"Unexpected instruction for returning a structure from a function.");
1470+
CastInstruction = static_cast<AddrSpaceCastInst *>(GenXArgs[0]);
1471+
// Remove 1st argument that is used to return the structure
1472+
GenXArgs.erase(GenXArgs.begin());
1473+
}
1474+
14441475
CallInst *NewCI = IntrinsicInst::Create(
14451476
NewFDecl, GenXArgs,
14461477
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
14471478
&CI);
14481479
if (FixReadNone)
14491480
NewCI->setMemoryEffects(MemoryEffects::none());
14501481
NewCI->setDebugLoc(CI.getDebugLoc());
1482+
if (DoesFunctionReturnStructure) {
1483+
IRBuilder<> Builder(&CI);
1484+
1485+
NewInst = Builder.CreateStore(
1486+
NewCI, Builder.CreateBitCast(CastInstruction->getPointerOperand(),
1487+
NewCI->getType()->getPointerTo()));
1488+
} else {
1489+
NewInst = addCastInstIfNeeded(&CI, NewCI);
1490+
}
14511491

1452-
Instruction *NewInst = addCastInstIfNeeded(&CI, NewCI);
14531492
CI.replaceAllUsesWith(NewInst);
14541493
CI.eraseFromParent();
14551494
}

sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,47 @@ __esimd_dpasw_nosrc0(__ESIMD_DNS::vector_type_t<T1, N1> src1,
714714
}
715715
#endif // !__SYCL_DEVICE_ONLY__
716716

717+
template <typename T, int N>
718+
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
719+
__ESIMD_DNS::vector_type_t<T, N>>
720+
__esimd_addc(__ESIMD_DNS::vector_type_t<T, N> src0,
721+
__ESIMD_DNS::vector_type_t<T, N> src1)
722+
#ifdef __SYCL_DEVICE_ONLY__
723+
;
724+
#else // !__SYCL_DEVICE_ONLY__
725+
{
726+
__ESIMD_NS::simd<uint64_t, N> Result64 = __ESIMD_NS::simd<T, N>(src0);
727+
Result64 += __ESIMD_NS::simd<T, N>(src1);
728+
auto Result32 = Result64.template bit_cast_view<T>();
729+
__ESIMD_NS::simd<uint32_t, N> CarryV = Result32.template select<N, 2>(1);
730+
__ESIMD_NS::simd<uint32_t, N> ResV = Result32.template select<N, 2>(0);
731+
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
732+
ReturnValue = std::make_pair(CarryV.data(), ResV.data());
733+
return ReturnValue;
734+
}
735+
#endif // !__SYCL_DEVICE_ONLY__
736+
737+
template <typename T, int N>
738+
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
739+
__ESIMD_DNS::vector_type_t<T, N>>
740+
__esimd_subb(__ESIMD_DNS::vector_type_t<T, N> src0,
741+
__ESIMD_DNS::vector_type_t<T, N> src1)
742+
#ifdef __SYCL_DEVICE_ONLY__
743+
;
744+
#else // !__SYCL_DEVICE_ONLY__
745+
{
746+
__ESIMD_NS::simd<uint64_t, N> Result64 = __ESIMD_NS::simd<T, N>(src0);
747+
Result64 -= __ESIMD_NS::simd<T, N>(src1);
748+
auto Result32 = Result64.template bit_cast_view<T>();
749+
__ESIMD_NS::simd<uint32_t, N> BorrowV =
750+
__ESIMD_NS::simd<T, N>(src0) < __ESIMD_NS::simd<T, N>(src1);
751+
__ESIMD_NS::simd<uint32_t, N> ResV = Result32.template select<N, 2>(0);
752+
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
753+
ReturnValue = std::make_pair(BorrowV.data(), ResV.data());
754+
return ReturnValue;
755+
}
756+
#endif // !__SYCL_DEVICE_ONLY__
757+
717758
template <uint8_t FuncControl, typename T, int N>
718759
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
719760
__esimd_bfn(__ESIMD_raw_vec_t(T, N) src0, __ESIMD_raw_vec_t(T, N) src1,

sycl/include/sycl/ext/intel/experimental/esimd/math.hpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,80 @@ imul(T &rmd, T0 src0, T1 src1) {
496496
return Res[0];
497497
}
498498

499+
template <int N>
500+
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
501+
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
502+
__ESIMD_NS::simd<uint32_t, N> src1) {
503+
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
504+
__ESIMD_DNS::vector_type_t<uint32_t, N>>
505+
Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());
506+
507+
carry = Result.first;
508+
return Result.second;
509+
}
510+
511+
template <int N>
512+
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
513+
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
514+
uint32_t src1) {
515+
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
516+
return addc(carry, src0, Src1V);
517+
}
518+
519+
template <int N>
520+
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
521+
addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
522+
__ESIMD_NS::simd<uint32_t, N> src1) {
523+
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
524+
return addc(carry, Src0V, src1);
525+
}
526+
527+
__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
528+
__ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
529+
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
530+
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
531+
__ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
532+
carry = CarryV[0];
533+
return Res[0];
534+
}
535+
536+
template <int N>
537+
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
538+
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
539+
__ESIMD_NS::simd<uint32_t, N> src1) {
540+
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
541+
__ESIMD_DNS::vector_type_t<uint32_t, N>>
542+
Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());
543+
544+
borrow = Result.first;
545+
return Result.second;
546+
}
547+
548+
template <int N>
549+
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
550+
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
551+
uint32_t src1) {
552+
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
553+
return subb(borrow, src0, Src1V);
554+
}
555+
556+
template <int N>
557+
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
558+
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
559+
__ESIMD_NS::simd<uint32_t, N> src1) {
560+
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
561+
return subb(borrow, Src0V, src1);
562+
}
563+
564+
__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
565+
__ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
566+
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
567+
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
568+
__ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
569+
borrow = BorrowV[0];
570+
return Res[0];
571+
}
572+
499573
/// Integral quotient (vector version)
500574
/// @tparam T element type of the input and return vectors.
501575
/// @tparam SZ size of the input and returned vectors.

sycl/test-e2e/ESIMD/addc.cpp

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//==---------------- addc.cpp - DPC++ ESIMD on-device test ---------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
13+
// The test verifies ESIMD API that adds 2 32-bit integer scalars/vectors with
14+
// carry returning the result as 2 parts: carry flag the input modified operand
15+
// and addition result as return from function.
16+
17+
#include "esimd_test_utils.hpp"
18+
19+
#include <iostream>
20+
#include <sycl/ext/intel/esimd.hpp>
21+
#include <sycl/sycl.hpp>
22+
23+
using namespace sycl;
24+
using namespace sycl::ext::intel::esimd;
25+
namespace iesimd = sycl::ext::intel::experimental::esimd;
26+
27+
template <int N, bool AIsVector, bool BIsVector> bool test(sycl::queue Q) {
28+
static_assert(AIsVector || BIsVector || N == 1,
29+
"(Scalar + Scalar) case must have N==1");
30+
31+
uint32_t ValuesToTryHost[] = {0,
32+
1,
33+
static_cast<uint32_t>(-1),
34+
0x7f,
35+
static_cast<uint32_t>(-0x7f),
36+
0x7fff,
37+
static_cast<uint32_t>(-0x7fff),
38+
0x7ffff,
39+
static_cast<uint32_t>(-0x7ffff),
40+
0x7ffffff,
41+
0x80,
42+
static_cast<uint32_t>(-0x80),
43+
0x8000,
44+
static_cast<uint32_t>(-0x8000),
45+
0x800000,
46+
static_cast<uint32_t>(-0x800000),
47+
0x80000000};
48+
uint32_t ValuesToTrySize = sizeof(ValuesToTryHost) / sizeof(uint32_t);
49+
50+
std::cout << "Running case: N=" << N << ", AIsVector=" << AIsVector
51+
<< ", BIsVector=" << BIsVector << std::endl;
52+
53+
auto ValuesToTryUPtr =
54+
esimd_test::usm_malloc_shared<uint32_t>(Q, ValuesToTrySize);
55+
uint32_t *ValuesToTryPtr = ValuesToTryUPtr.get();
56+
memcpy(ValuesToTryPtr, ValuesToTryHost, ValuesToTrySize * sizeof(uint32_t));
57+
58+
auto ResultsMatrixUPtr = esimd_test::usm_malloc_shared<uint32_t>(
59+
Q, ValuesToTrySize * ValuesToTrySize * N);
60+
auto CarryMatrixUPtr = esimd_test::usm_malloc_shared<uint32_t>(
61+
Q, ValuesToTrySize * ValuesToTrySize * N);
62+
uint32_t *ResultsMatrixPtr = ResultsMatrixUPtr.get();
63+
uint32_t *CarryMatrixPtr = CarryMatrixUPtr.get();
64+
65+
try {
66+
Q.single_task([=]() SYCL_ESIMD_KERNEL {
67+
simd<uint32_t, N> VecInc(0, 1);
68+
for (int AI = 0; AI < ValuesToTrySize; AI++) {
69+
using AType =
70+
std::conditional_t<AIsVector, simd<uint32_t, N>, uint32_t>;
71+
uint32_t AScalar = simd<uint32_t, 1>(
72+
reinterpret_cast<uint32_t *>(ValuesToTryPtr) + AI)[0];
73+
AType A = AScalar;
74+
if constexpr (AIsVector)
75+
A += VecInc;
76+
77+
for (int BI = 0; BI < ValuesToTrySize; BI++) {
78+
using BType =
79+
std::conditional_t<BIsVector, simd<uint32_t, N>, uint32_t>;
80+
uint32_t BScalar = simd<uint32_t, 1>(
81+
reinterpret_cast<uint32_t *>(ValuesToTryPtr) + BI)[0];
82+
BType B = BScalar;
83+
if constexpr (BIsVector)
84+
B += VecInc;
85+
86+
using ResType = std::conditional_t<AIsVector || BIsVector,
87+
simd<uint32_t, N>, uint32_t>;
88+
ResType Carry = 0;
89+
ResType Res = iesimd::addc(Carry, A, B);
90+
91+
if constexpr (AIsVector || BIsVector) {
92+
Carry.copy_to(CarryMatrixPtr + (ValuesToTrySize * AI + BI) * N);
93+
Res.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N);
94+
} else {
95+
simd<uint32_t, 1> Carry1 = Carry;
96+
simd<uint32_t, 1> Res1 = Res;
97+
Carry1.copy_to(CarryMatrixPtr + (ValuesToTrySize * AI + BI) * N);
98+
Res1.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N);
99+
}
100+
101+
} // end for BI
102+
} // end for AI
103+
}).wait();
104+
} catch (sycl::exception const &e) {
105+
std::cout << "SYCL exception caught: " << e.what() << '\n';
106+
return 1;
107+
}
108+
109+
using Result64T = uint64_t;
110+
int NumErrors = 0;
111+
for (int AI = 0; AI < ValuesToTrySize; AI++) {
112+
for (int BI = 0; BI < ValuesToTrySize; BI++) {
113+
for (int I = 0; I < N; I++) {
114+
uint32_t A = ValuesToTryHost[AI];
115+
if constexpr (AIsVector)
116+
A += I;
117+
uint32_t B = ValuesToTryHost[BI];
118+
if constexpr (BIsVector)
119+
B += I;
120+
Result64T R = static_cast<uint32_t>(A);
121+
R += static_cast<uint32_t>(B);
122+
123+
uint32_t ExpectedRes = R & 0xffffffff;
124+
uint32_t ExpectedCarry = (R >> 32) & 0xffffffff;
125+
uint32_t ComputedRes =
126+
ResultsMatrixPtr[(AI * ValuesToTrySize + BI) * N + I];
127+
uint32_t ComputedCarry =
128+
CarryMatrixPtr[(AI * ValuesToTrySize + BI) * N + I];
129+
if (ComputedRes != ExpectedRes || ComputedCarry != ExpectedCarry) {
130+
std::cout << "Error for (" << AI << "," << BI << "): " << A << " + "
131+
<< B << " is Computed(" << ComputedCarry << ","
132+
<< ComputedRes << ") != Expected (" << ExpectedCarry << ","
133+
<< ExpectedRes << "), R = " << R << "\n";
134+
NumErrors++;
135+
}
136+
}
137+
}
138+
}
139+
140+
return NumErrors == 0;
141+
}
142+
143+
int main() {
144+
queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
145+
auto D = Q.get_device();
146+
std::cout << "Running on " << D.get_info<info::device::name>() << "\n";
147+
148+
constexpr bool AIsVector = true;
149+
constexpr bool BIsVector = true;
150+
bool Pass = true;
151+
Pass &= test<16, AIsVector, BIsVector>(Q);
152+
Pass &= test<8, AIsVector, !BIsVector>(Q);
153+
Pass &= test<4, !AIsVector, BIsVector>(Q);
154+
155+
Pass &= test<1, AIsVector, BIsVector>(Q);
156+
Pass &= test<1, AIsVector, !BIsVector>(Q);
157+
Pass &= test<1, !AIsVector, BIsVector>(Q);
158+
159+
Pass &= test<1, !AIsVector, !BIsVector>(Q);
160+
161+
std::cout << (Pass > 0 ? "Passed\n" : "FAILED\n");
162+
return Pass ? 0 : 1;
163+
}

0 commit comments

Comments
 (0)