Skip to content

Commit 006dcb7

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:ccc471fe3eb8 into amd-gfx:a2e482cac6f7
Local branch amd-gfx a2e482c Merged main:3348b4688f57 into amd-gfx:2da171c79f54 Remote branch main ccc471f [clang][bytecode] Implement __builtin_reduce_and (llvm#118289)
2 parents a2e482c + ccc471f commit 006dcb7

File tree

15 files changed

+432
-68
lines changed

15 files changed

+432
-68
lines changed

clang/lib/AST/ByteCode/Integral.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ template <unsigned Bits, bool Signed> class Integral final {
123123
APSInt toAPSInt() const {
124124
return APSInt(APInt(Bits, static_cast<uint64_t>(V), Signed), !Signed);
125125
}
126-
APSInt toAPSInt(unsigned BitWidth) const { return APSInt(toAPInt(BitWidth)); }
126+
APSInt toAPSInt(unsigned BitWidth) const {
127+
return APSInt(toAPInt(BitWidth), !Signed);
128+
}
127129
APInt toAPInt(unsigned BitWidth) const {
128130
if constexpr (Signed)
129131
return APInt(Bits, static_cast<uint64_t>(V), Signed)

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,32 +1695,71 @@ static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
16951695
assert(Arg.getFieldDesc()->isPrimitiveArray());
16961696

16971697
unsigned ID = Func->getBuiltinID();
1698-
if (ID == Builtin::BI__builtin_reduce_add) {
1699-
QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1700-
assert(Call->getType() == ElemType);
1701-
PrimType ElemT = *S.getContext().classify(ElemType);
1702-
unsigned NumElems = Arg.getNumElems();
1703-
1704-
INT_TYPE_SWITCH(ElemT, {
1705-
T Sum = Arg.atIndex(0).deref<T>();
1706-
unsigned BitWidth = Sum.bitWidth();
1707-
for (unsigned I = 1; I != NumElems; ++I) {
1708-
T Elem = Arg.atIndex(I).deref<T>();
1709-
if (T::add(Sum, Elem, BitWidth, &Sum)) {
1698+
QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1699+
assert(Call->getType() == ElemType);
1700+
PrimType ElemT = *S.getContext().classify(ElemType);
1701+
unsigned NumElems = Arg.getNumElems();
1702+
1703+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1704+
T Result = Arg.atIndex(0).deref<T>();
1705+
unsigned BitWidth = Result.bitWidth();
1706+
for (unsigned I = 1; I != NumElems; ++I) {
1707+
T Elem = Arg.atIndex(I).deref<T>();
1708+
T PrevResult = Result;
1709+
1710+
if (ID == Builtin::BI__builtin_reduce_add) {
1711+
if (T::add(Result, Elem, BitWidth, &Result)) {
17101712
unsigned OverflowBits = BitWidth + 1;
1711-
(void)handleOverflow(
1712-
S, OpPC,
1713-
(Sum.toAPSInt(OverflowBits) + Elem.toAPSInt(OverflowBits)));
1713+
(void)handleOverflow(S, OpPC,
1714+
(PrevResult.toAPSInt(OverflowBits) +
1715+
Elem.toAPSInt(OverflowBits)));
17141716
return false;
17151717
}
1718+
} else if (ID == Builtin::BI__builtin_reduce_mul) {
1719+
if (T::mul(Result, Elem, BitWidth, &Result)) {
1720+
unsigned OverflowBits = BitWidth * 2;
1721+
(void)handleOverflow(S, OpPC,
1722+
(PrevResult.toAPSInt(OverflowBits) *
1723+
Elem.toAPSInt(OverflowBits)));
1724+
return false;
1725+
}
1726+
1727+
} else if (ID == Builtin::BI__builtin_reduce_and) {
1728+
(void)T::bitAnd(Result, Elem, BitWidth, &Result);
1729+
} else {
1730+
llvm_unreachable("Unhandled vector reduce builtin");
17161731
}
1717-
pushInteger(S, Sum, Call->getType());
1718-
});
1732+
}
1733+
pushInteger(S, Result, Call->getType());
1734+
});
17191735

1736+
return true;
1737+
}
1738+
1739+
static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
1740+
const InterpFrame *Frame,
1741+
const Function *Func, const CallExpr *Call) {
1742+
assert(Call->getNumArgs() == 3);
1743+
Pointer DestPtr = getParam<Pointer>(Frame, 0);
1744+
const Pointer &SrcPtr = getParam<Pointer>(Frame, 1);
1745+
const APSInt &Size =
1746+
peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
1747+
assert(!Size.isSigned() && "memcpy and friends take an unsigned size");
1748+
1749+
if (DestPtr.isDummy() || SrcPtr.isDummy())
1750+
return false;
1751+
1752+
// If the size is zero, we treat this as always being a valid no-op.
1753+
if (Size.isZero()) {
1754+
S.Stk.push<Pointer>(DestPtr);
17201755
return true;
17211756
}
17221757

1723-
llvm_unreachable("Unsupported vector reduce builtin");
1758+
if (!DoBitCastPtr(S, OpPC, SrcPtr, DestPtr))
1759+
return false;
1760+
1761+
S.Stk.push<Pointer>(DestPtr);
1762+
return true;
17241763
}
17251764

17261765
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
@@ -2169,10 +2208,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
21692208
break;
21702209

21712210
case Builtin::BI__builtin_reduce_add:
2211+
case Builtin::BI__builtin_reduce_mul:
2212+
case Builtin::BI__builtin_reduce_and:
21722213
if (!interp__builtin_vector_reduce(S, OpPC, Frame, F, Call))
21732214
return false;
21742215
break;
21752216

2217+
case Builtin::BI__builtin_memcpy:
2218+
if (!interp__builtin_memcpy(S, OpPC, Frame, F, Call))
2219+
return false;
2220+
break;
2221+
21762222
default:
21772223
S.FFDiag(S.Current->getLocation(OpPC),
21782224
diag::note_invalid_subexpr_in_const_expr)

clang/test/AST/ByteCode/builtin-functions.cpp

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,6 @@ namespace BuiltinInImplicitCtor {
991991
static_assert(Foo.a == 0, "");
992992
}
993993

994-
995994
typedef double vector4double __attribute__((__vector_size__(32)));
996995
typedef float vector4float __attribute__((__vector_size__(16)));
997996
typedef long long vector4long __attribute__((__vector_size__(32)));
@@ -1035,3 +1034,58 @@ namespace RecuceAdd {
10351034
static_assert(reduceAddInt3 == 0);
10361035
#endif
10371036
}
1037+
1038+
namespace ReduceMul {
1039+
static_assert(__builtin_reduce_mul((vector4char){}) == 0);
1040+
static_assert(__builtin_reduce_mul((vector4char){1, 2, 3, 4}) == 24);
1041+
static_assert(__builtin_reduce_mul((vector4short){1, 2, 30, 40}) == 2400);
1042+
#ifndef __AVR__
1043+
static_assert(__builtin_reduce_mul((vector4int){10, 20, 300, 400}) == 24'000'000);
1044+
#endif
1045+
static_assert(__builtin_reduce_mul((vector4long){1000L, 2000L, 3000L, 4000L}) == 24'000'000'000'000L);
1046+
constexpr int reduceMulInt1 = __builtin_reduce_mul((vector4int){~(1 << (sizeof(int) * 8 - 1)), 1, 1, 2});
1047+
// both-error@-1 {{must be initialized by a constant expression}} \
1048+
// both-note@-1 {{outside the range of representable values of type 'int'}}
1049+
constexpr long long reduceMulLong1 = __builtin_reduce_mul((vector4long){~(1LL << (sizeof(long long) * 8 - 1)), 1, 1, 2});
1050+
// both-error@-1 {{must be initialized by a constant expression}} \
1051+
// both-note@-1 {{outside the range of representable values of type 'long long'}}
1052+
constexpr int reduceMulInt2 = __builtin_reduce_mul((vector4int){(1 << (sizeof(int) * 8 - 1)), 1, 1, 2});
1053+
// both-error@-1 {{must be initialized by a constant expression}} \
1054+
// both-note@-1 {{outside the range of representable values of type 'int'}}
1055+
constexpr long long reduceMulLong2 = __builtin_reduce_mul((vector4long){(1LL << (sizeof(long long) * 8 - 1)), 1, 1, 2});
1056+
// both-error@-1 {{must be initialized by a constant expression}} \
1057+
// both-note@-1 {{outside the range of representable values of type 'long long'}}
1058+
static_assert(__builtin_reduce_mul((vector4uint){~0U, 1, 1, 2}) ==
1059+
#ifdef __AVR__
1060+
0);
1061+
#else
1062+
(~0U - 1));
1063+
#endif
1064+
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);
1065+
}
1066+
1067+
namespace ReduceAnd {
1068+
static_assert(__builtin_reduce_and((vector4char){}) == 0);
1069+
static_assert(__builtin_reduce_and((vector4char){(char)0x11, (char)0x22, (char)0x44, (char)0x88}) == 0);
1070+
static_assert(__builtin_reduce_and((vector4short){(short)0x1111, (short)0x2222, (short)0x4444, (short)0x8888}) == 0);
1071+
static_assert(__builtin_reduce_and((vector4int){(int)0x11111111, (int)0x22222222, (int)0x44444444, (int)0x88888888}) == 0);
1072+
#if __INT_WIDTH__ == 32
1073+
static_assert(__builtin_reduce_and((vector4long){(long long)0x1111111111111111L, (long long)0x2222222222222222L, (long long)0x4444444444444444L, (long long)0x8888888888888888L}) == 0L);
1074+
static_assert(__builtin_reduce_and((vector4char){(char)-1, (char)~0x22, (char)~0x44, (char)~0x88}) == 0x11);
1075+
static_assert(__builtin_reduce_and((vector4short){(short)~0x1111, (short)-1, (short)~0x4444, (short)~0x8888}) == 0x2222);
1076+
static_assert(__builtin_reduce_and((vector4int){(int)~0x11111111, (int)~0x22222222, (int)-1, (int)~0x88888888}) == 0x44444444);
1077+
static_assert(__builtin_reduce_and((vector4long){(long long)~0x1111111111111111L, (long long)~0x2222222222222222L, (long long)~0x4444444444444444L, (long long)-1}) == 0x8888888888888888L);
1078+
static_assert(__builtin_reduce_and((vector4uint){0x11111111U, 0x22222222U, 0x44444444U, 0x88888888U}) == 0U);
1079+
static_assert(__builtin_reduce_and((vector4ulong){0x1111111111111111UL, 0x2222222222222222UL, 0x4444444444444444UL, 0x8888888888888888UL}) == 0L);
1080+
#endif
1081+
}
1082+
1083+
namespace BuiltinMemcpy {
1084+
constexpr int simple() {
1085+
int a = 12;
1086+
int b = 0;
1087+
__builtin_memcpy(&b, &a, sizeof(a));
1088+
return b;
1089+
}
1090+
static_assert(simple() == 12);
1091+
}

lldb/packages/Python/lldbsuite/test/lldbtest.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -865,13 +865,9 @@ def setUp(self):
865865
session_file = self.getLogBasenameForCurrentTest() + ".log"
866866
self.log_files.append(session_file)
867867

868-
# Python 3 doesn't support unbuffered I/O in text mode. Open buffered.
869-
self.session = encoded_file.open(session_file, "utf-8", mode="w")
870-
871868
# Optimistically set __errored__, __failed__, __expected__ to False
872869
# initially. If the test errored/failed, the session info
873-
# (self.session) is then dumped into a session specific file for
874-
# diagnosis.
870+
# is then dumped into a session specific file for diagnosis.
875871
self.__cleanup_errored__ = False
876872
self.__errored__ = False
877873
self.__failed__ = False
@@ -1235,20 +1231,25 @@ def dumpSessionInfo(self):
12351231
else:
12361232
prefix = "Success"
12371233

1234+
session_file = self.getLogBasenameForCurrentTest() + ".log"
1235+
1236+
# Python 3 doesn't support unbuffered I/O in text mode. Open buffered.
1237+
session = encoded_file.open(session_file, "utf-8", mode="w")
1238+
12381239
if not self.__unexpected__ and not self.__skipped__:
12391240
for test, traceback in pairs:
12401241
if test is self:
1241-
print(traceback, file=self.session)
1242+
print(traceback, file=session)
12421243

12431244
import datetime
12441245

12451246
print(
12461247
"Session info generated @",
12471248
datetime.datetime.now().ctime(),
1248-
file=self.session,
1249+
file=session,
12491250
)
1250-
self.session.close()
1251-
del self.session
1251+
session.close()
1252+
del session
12521253

12531254
# process the log files
12541255
if prefix != "Success" or lldbtest_config.log_success:

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 519958
19+
#define LLVM_MAIN_REVISION 519969
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/LineEditor/LineEditor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class LineEditor {
4141

4242
void saveHistory();
4343
void loadHistory();
44+
void setHistorySize(int size);
4445

4546
static std::string getDefaultHistoryPath(StringRef ProgName);
4647

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4882,9 +4882,19 @@ llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS,
48824882
}
48834883

48844884
static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
4885-
bool CondIsTrue,
4885+
unsigned Depth, bool CondIsTrue,
48864886
const Instruction *CxtI,
48874887
KnownFPClass &KnownFromContext) {
4888+
Value *A, *B;
4889+
if (Depth < MaxAnalysisRecursionDepth &&
4890+
(CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
4891+
: match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
4892+
computeKnownFPClassFromCond(V, A, Depth + 1, CondIsTrue, CxtI,
4893+
KnownFromContext);
4894+
computeKnownFPClassFromCond(V, B, Depth + 1, CondIsTrue, CxtI,
4895+
KnownFromContext);
4896+
return;
4897+
}
48884898
CmpInst::Predicate Pred;
48894899
Value *LHS;
48904900
uint64_t ClassVal = 0;
@@ -4925,13 +4935,13 @@ static KnownFPClass computeKnownFPClassFromContext(const Value *V,
49254935

49264936
BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
49274937
if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
4928-
computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/true, Q.CxtI,
4929-
KnownFromContext);
4938+
computeKnownFPClassFromCond(V, Cond, /*Depth=*/0, /*CondIsTrue=*/true,
4939+
Q.CxtI, KnownFromContext);
49304940

49314941
BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
49324942
if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
4933-
computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/false, Q.CxtI,
4934-
KnownFromContext);
4943+
computeKnownFPClassFromCond(V, Cond, /*Depth=*/0, /*CondIsTrue=*/false,
4944+
Q.CxtI, KnownFromContext);
49354945
}
49364946
}
49374947

@@ -4953,8 +4963,8 @@ static KnownFPClass computeKnownFPClassFromContext(const Value *V,
49534963
if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
49544964
continue;
49554965

4956-
computeKnownFPClassFromCond(V, I->getArgOperand(0), /*CondIsTrue=*/true,
4957-
Q.CxtI, KnownFromContext);
4966+
computeKnownFPClassFromCond(V, I->getArgOperand(0), /*Depth=*/0,
4967+
/*CondIsTrue=*/true, Q.CxtI, KnownFromContext);
49584968
}
49594969

49604970
return KnownFromContext;
@@ -10117,7 +10127,7 @@ void llvm::findValuesAffectedByCondition(
1011710127

1011810128
if (HasRHSC && match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X))))
1011910129
AddAffected(X);
10120-
} else if (match(Cond, m_FCmp(Pred, m_Value(A), m_Value(B)))) {
10130+
} else if (match(V, m_FCmp(Pred, m_Value(A), m_Value(B)))) {
1012110131
AddCmpOperands(A, B);
1012210132

1012310133
// fcmp fneg(x), y

llvm/lib/LineEditor/LineEditor.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <cstdio>
1818
#ifdef HAVE_LIBEDIT
1919
#include <histedit.h>
20+
constexpr int DefaultHistorySize = 800;
2021
#endif
2122

2223
using namespace llvm;
@@ -220,8 +221,8 @@ LineEditor::LineEditor(StringRef ProgName, StringRef HistoryPath, FILE *In,
220221
NULL); // Fix the delete key.
221222
::el_set(Data->EL, EL_CLIENTDATA, Data.get());
222223

224+
setHistorySize(DefaultHistorySize);
223225
HistEvent HE;
224-
::history(Data->Hist, &HE, H_SETSIZE, 800);
225226
::history(Data->Hist, &HE, H_SETUNIQUE, 1);
226227
loadHistory();
227228
}
@@ -248,6 +249,11 @@ void LineEditor::loadHistory() {
248249
}
249250
}
250251

252+
void LineEditor::setHistorySize(int size) {
253+
HistEvent HE;
254+
::history(Data->Hist, &HE, H_SETSIZE, size);
255+
}
256+
251257
std::optional<std::string> LineEditor::readLine() const {
252258
// Call el_gets to prompt the user and read the user's input.
253259
int LineLen = 0;
@@ -291,6 +297,7 @@ LineEditor::~LineEditor() {
291297

292298
void LineEditor::saveHistory() {}
293299
void LineEditor::loadHistory() {}
300+
void LineEditor::setHistorySize(int size) {}
294301

295302
std::optional<std::string> LineEditor::readLine() const {
296303
::fprintf(Data->Out, "%s", Prompt.c_str());

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41404,6 +41404,7 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4140441404
N->isOnlyUserOf(N.getOperand(0).getNode())) {
4140541405
SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
4140641406
unsigned SrcOpcode = N0.getOpcode();
41407+
EVT OpVT = N0.getValueType();
4140741408
if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
4140841409
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
4140941410
SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
@@ -41421,13 +41422,23 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4142141422
LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00);
4142241423
RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01);
4142341424
}
41424-
EVT OpVT = N0.getValueType();
4142541425
return DAG.getBitcast(ShuffleVT,
4142641426
DAG.getNode(SrcOpcode, DL, OpVT,
4142741427
DAG.getBitcast(OpVT, LHS),
4142841428
DAG.getBitcast(OpVT, RHS)));
4142941429
}
4143041430
}
41431+
if (SrcOpcode == ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
41432+
OpVT.getScalarSizeInBits() ==
41433+
N0.getOperand(0).getScalarValueSizeInBits()) {
41434+
SDValue Op00 = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
41435+
SDValue Res =
41436+
N.getNumOperands() == 2
41437+
? DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1))
41438+
: DAG.getNode(Opc, DL, ShuffleVT, Op00);
41439+
Res = DAG.getBitcast(N0.getOperand(0).getValueType(), Res);
41440+
return DAG.getBitcast(ShuffleVT, DAG.getNode(SrcOpcode, DL, OpVT, Res));
41441+
}
4143141442
}
4143241443
break;
4143341444
}

0 commit comments

Comments
 (0)