-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][Interp] Implement __builtin_bit_cast #68288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang ChangesFrom https://reviews.llvm.org/D154951 - which is never gonna make it out of Phabricator anyway. The tests are failing because https://reviews.llvm.org/D154581 is not pushed. Aaron's review mentioned that we should support bitfields before pushing this. Patch is 56.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68288.diff 13 Files Affected:
diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index fe3f8c485ec1c56..5a188b583022bda 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -74,6 +74,7 @@ add_clang_library(clangAST
Interp/Frame.cpp
Interp/Function.cpp
Interp/InterpBuiltin.cpp
+ Interp/InterpBitcast.cpp
Interp/Floating.cpp
Interp/Interp.cpp
Interp/InterpBlock.cpp
diff --git a/clang/lib/AST/Interp/Boolean.h b/clang/lib/AST/Interp/Boolean.h
index c3ed3d61f76ca1c..d395264ab2de448 100644
--- a/clang/lib/AST/Interp/Boolean.h
+++ b/clang/lib/AST/Interp/Boolean.h
@@ -9,14 +9,15 @@
#ifndef LLVM_CLANG_AST_INTERP_BOOLEAN_H
#define LLVM_CLANG_AST_INTERP_BOOLEAN_H
-#include <cstddef>
-#include <cstdint>
#include "Integral.h"
#include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
#include "clang/AST/ComparisonCategories.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
namespace clang {
namespace interp {
@@ -65,6 +66,9 @@ class Boolean final {
Boolean toUnsigned() const { return *this; }
constexpr static unsigned bitWidth() { return 1; }
+ constexpr static unsigned objectReprBits() { return 8; }
+ constexpr static unsigned valueReprBytes(const ASTContext &Ctx) { return 1; }
+
bool isZero() const { return !V; }
bool isMin() const { return isZero(); }
@@ -106,6 +110,13 @@ class Boolean final {
return Boolean(!Value.isZero());
}
+ static Boolean bitcastFromMemory(const std::byte *Buff) {
+ bool Val = static_cast<bool>(*Buff);
+ return Boolean(Val);
+ }
+
+ void bitcastToMemory(std::byte *Buff) { std::memcpy(Buff, &V, sizeof(V)); }
+
static Boolean zero() { return from(false); }
template <typename T>
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 3cf8bf874b1d210..8cc010d6f100ccb 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -67,6 +67,74 @@ template <class Emitter> class OptionScope final {
} // namespace interp
} // namespace clang
+// This function is constexpr if and only if To, From, and the types of
+// all subobjects of To and From are types T such that...
+// (3.1) - is_union_v<T> is false;
+// (3.2) - is_pointer_v<T> is false;
+// (3.3) - is_member_pointer_v<T> is false;
+// (3.4) - is_volatile_v<T> is false; and
+// (3.5) - T has no non-static data members of reference type
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
+ const Expr *SubExpr = E->getSubExpr();
+ QualType FromType = SubExpr->getType();
+ QualType ToType = E->getType();
+ std::optional<PrimType> ToT = classify(ToType);
+
+ // FIXME: This is wrong. We need to do the bitcast and then
+ // throw away the result, so we still get the diagnostics.
+ if (DiscardResult)
+ return this->discard(SubExpr);
+
+ if (ToType->isNullPtrType()) {
+ if (!this->discard(SubExpr))
+ return false;
+
+ return this->emitNullPtr(E);
+ }
+
+ if (FromType->isNullPtrType() && ToT) {
+ if (!this->discard(SubExpr))
+ return false;
+
+ return visitZeroInitializer(ToType, E);
+ }
+ assert(!ToType->isReferenceType());
+
+ // Get a pointer to the value-to-cast on the stack.
+ if (!this->visit(SubExpr))
+ return false;
+
+ if (!ToT || ToT == PT_Ptr) {
+ // Conversion to an array or record type.
+ return this->emitBitCastPtr(E);
+ }
+
+ assert(ToT);
+
+ // Conversion to a primitive type. FromType can be another
+ // primitive type, or a record/array.
+ //
+ // Same thing for floats, but we need the target
+ // semantics here.
+ if (ToT == PT_Float) {
+ const auto *TargetSemantics = &Ctx.getFloatSemantics(ToType);
+ CharUnits FloatSize = Ctx.getASTContext().getTypeSizeInChars(ToType);
+ return this->emitBitCastFP(TargetSemantics, FloatSize.getQuantity(), E);
+ }
+
+ bool ToTypeIsUChar = (ToType->isSpecificBuiltinType(BuiltinType::UChar) ||
+ ToType->isSpecificBuiltinType(BuiltinType::Char_U));
+
+ if (!this->emitBitCast(*ToT, ToTypeIsUChar || ToType->isStdByteType(), E))
+ return false;
+
+ if (DiscardResult)
+ return this->emitPop(*ToT, E);
+
+ return true;
+}
+
template <class Emitter>
bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
auto *SubExpr = CE->getSubExpr();
@@ -87,6 +155,9 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
});
}
+ case CK_LValueToRValueBitCast:
+ return this->emitBuiltinBitCast(CE);
+
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
if (!this->visit(SubExpr))
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 47a3f75f13459d0..8f04c4809ccf41d 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -280,6 +280,7 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
bool emitRecordDestruction(const Descriptor *Desc);
unsigned collectBaseOffset(const RecordType *BaseType,
const RecordType *DerivedType);
+ bool emitBuiltinBitCast(const CastExpr *E);
protected:
/// Variable to storage mapping.
diff --git a/clang/lib/AST/Interp/Floating.h b/clang/lib/AST/Interp/Floating.h
index a22b3fa79f3992f..727d37520047b77 100644
--- a/clang/lib/AST/Interp/Floating.h
+++ b/clang/lib/AST/Interp/Floating.h
@@ -15,6 +15,7 @@
#include "Primitives.h"
#include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
#include "llvm/ADT/APFloat.h"
namespace clang {
@@ -84,6 +85,12 @@ class Floating final {
}
unsigned bitWidth() const { return F.semanticsSizeInBits(F.getSemantics()); }
+ unsigned objectReprBits() { return F.semanticsSizeInBits(F.getSemantics()); }
+
+ unsigned valueReprBytes(const ASTContext &Ctx) {
+ return Ctx.toCharUnitsFromBits(F.semanticsSizeInBits(F.getSemantics()))
+ .getQuantity();
+ }
bool isSigned() const { return true; }
bool isNegative() const { return F.isNegative(); }
@@ -133,6 +140,10 @@ class Floating final {
return Floating(APFloat(Sem, API));
}
+ void bitcastToMemory(std::byte *Buff) {
+ llvm::APInt API = F.bitcastToAPInt();
+ llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8);
+ }
// === Serialization support ===
size_t bytesToSerialize() const {
diff --git a/clang/lib/AST/Interp/Integral.h b/clang/lib/AST/Interp/Integral.h
index 4dbe9c9bcb14b43..9c78bb04876ea02 100644
--- a/clang/lib/AST/Interp/Integral.h
+++ b/clang/lib/AST/Interp/Integral.h
@@ -13,8 +13,9 @@
#ifndef LLVM_CLANG_AST_INTERP_INTEGRAL_H
#define LLVM_CLANG_AST_INTERP_INTEGRAL_H
-#include "clang/AST/ComparisonCategories.h"
#include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/ComparisonCategories.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -116,6 +117,10 @@ template <unsigned Bits, bool Signed> class Integral final {
}
constexpr static unsigned bitWidth() { return Bits; }
+ constexpr static unsigned objectReprBits() { return Bits; }
+ constexpr static unsigned valueReprBytes(const ASTContext &Ctx) {
+ return Ctx.toCharUnitsFromBits(Bits).getQuantity();
+ }
bool isZero() const { return !V; }
@@ -182,6 +187,18 @@ template <unsigned Bits, bool Signed> class Integral final {
return Integral(Value);
}
+ static Integral bitcastFromMemory(const std::byte *Buff) {
+ ReprT V;
+
+ std::memcpy(&V, Buff, sizeof(ReprT));
+ return Integral(V);
+ }
+
+ void bitcastToMemory(std::byte *Buff) const {
+ assert(Buff);
+ std::memcpy(Buff, &V, sizeof(ReprT));
+ }
+
static bool inRange(int64_t Value, unsigned NumBits) {
return CheckRange<ReprT, Min, Max>(Value);
}
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 8e851595963184c..d2c65daafc7d135 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -576,7 +576,24 @@ bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR) {
return false;
}
}
+ return false;
+}
+
+bool CheckBitcast(InterpState &S, CodePtr OpPC, unsigned IndeterminateBits,
+ bool TargetIsUCharOrByte) {
+
+ // This is always fine.
+ if (IndeterminateBits == 0)
+ return true;
+
+ // Indeterminate bits can only be bitcast to unsigned char or std::byte.
+ if (TargetIsUCharOrByte)
+ return true;
+ const Expr *E = S.Current->getExpr(OpPC);
+ QualType ExprType = E->getType();
+ S.FFDiag(E, diag::note_constexpr_bit_cast_indet_dest)
+ << ExprType << S.getLangOpts().CharIsSigned << E->getSourceRange();
return false;
}
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index d62e64bedb213ac..c14f890e188ecdb 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -183,6 +183,9 @@ bool CheckFloatResult(InterpState &S, CodePtr OpPC, const Floating &Result,
/// Checks why the given DeclRefExpr is invalid.
bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR);
+bool CheckBitcast(InterpState &S, CodePtr OpPC, unsigned IndeterminateBits,
+ bool TargetIsUCharOrByte);
+
/// Interpreter entry point.
bool Interpret(InterpState &S, APValue &Result);
@@ -194,6 +197,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
llvm::ArrayRef<int64_t> ArrayIndices, int64_t &Result);
+/// Perform a bitcast of all fields of P into Buff. This performs the
+/// actions of a __builtin_bit_cast expression when the target type
+/// is primitive.
+bool DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &P, std::byte *Buff,
+ size_t BuffSize, unsigned &IndeterminateBits);
+
+/// Perform a bitcast of all fields of P into the fields of DestPtr.
+/// This performs the actions of a __builtin_bit_cast expression when
+/// the target type is a composite type.
+bool DoBitCastToPtr(InterpState &S, const Pointer &P, Pointer &DestPtr,
+ CodePtr PC);
+
enum class ArithOp { Add, Sub };
//===----------------------------------------------------------------------===//
@@ -1557,6 +1572,57 @@ template <PrimType TIn, PrimType TOut> bool Cast(InterpState &S, CodePtr OpPC) {
return true;
}
+template <PrimType Name, class ToT = typename PrimConv<Name>::T>
+bool BitCast(InterpState &S, CodePtr OpPC, bool TargetIsUCharOrByte) {
+ const Pointer &FromPtr = S.Stk.pop<Pointer>();
+
+ size_t BuffSize = ToT::valueReprBytes(S.getCtx());
+ std::vector<std::byte> Buff(BuffSize);
+ unsigned IndeterminateBits = 0;
+
+ if (!DoBitCast(S, OpPC, FromPtr, Buff.data(), BuffSize, IndeterminateBits))
+ return false;
+
+ if (!CheckBitcast(S, OpPC, IndeterminateBits, TargetIsUCharOrByte))
+ return false;
+
+ S.Stk.push<ToT>(ToT::bitcastFromMemory(Buff.data()));
+ return true;
+}
+
+/// Bitcast TO a float.
+inline bool BitCastFP(InterpState &S, CodePtr OpPC,
+ const llvm::fltSemantics *Sem, uint32_t TargetSize) {
+ const Pointer &FromPtr = S.Stk.pop<Pointer>();
+
+ std::vector<std::byte> Buff(TargetSize);
+ unsigned IndeterminateBits = 0;
+
+ if (!DoBitCast(S, OpPC, FromPtr, Buff.data(), TargetSize, IndeterminateBits))
+ return false;
+
+ if (!CheckBitcast(S, OpPC, IndeterminateBits, /*TargetIsUCharOrByte=*/false))
+ return false;
+
+ S.Stk.push<Floating>(Floating::bitcastFromMemory(Buff.data(), *Sem));
+ return true;
+}
+
+/// 1) Pops a pointer from the stack
+/// 2) Peeks a pointer
+/// 3) Bitcasts the contents of the first pointer to the
+/// fields of the second pointer.
+inline bool BitCastPtr(InterpState &S, CodePtr OpPC) {
+ const Pointer &FromPtr = S.Stk.pop<Pointer>();
+ Pointer &ToPtr = S.Stk.peek<Pointer>();
+
+ // FIXME: We should CheckLoad() for FromPtr and ToPtr here, I think.
+ if (!DoBitCastToPtr(S, FromPtr, ToPtr, OpPC))
+ return false;
+
+ return true;
+}
+
/// 1) Pops a Floating from the stack.
/// 2) Pushes a new floating on the stack that uses the given semantics.
inline bool CastFP(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem,
diff --git a/clang/lib/AST/Interp/InterpBitcast.cpp b/clang/lib/AST/Interp/InterpBitcast.cpp
new file mode 100644
index 000000000000000..91326fc8e788080
--- /dev/null
+++ b/clang/lib/AST/Interp/InterpBitcast.cpp
@@ -0,0 +1,482 @@
+//===--- InterpBitcast.cpp - Interpreter for the constexpr VM ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "Boolean.h"
+#include "Interp.h"
+#include "PrimType.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetInfo.h"
+
+namespace clang {
+namespace interp {
+
+// TODO: Try to e-duplicate the primitive and composite versions.
+
+/// Used to iterate over pointer fields.
+using DataFunc =
+ llvm::function_ref<bool(const Pointer &P, PrimType Ty, size_t Offset)>;
+
+#define BITCAST_TYPE_SWITCH(Expr, B) \
+ do { \
+ switch (Expr) { \
+ TYPE_SWITCH_CASE(PT_Sint8, B) \
+ TYPE_SWITCH_CASE(PT_Uint8, B) \
+ TYPE_SWITCH_CASE(PT_Sint16, B) \
+ TYPE_SWITCH_CASE(PT_Uint16, B) \
+ TYPE_SWITCH_CASE(PT_Sint32, B) \
+ TYPE_SWITCH_CASE(PT_Uint32, B) \
+ TYPE_SWITCH_CASE(PT_Sint64, B) \
+ TYPE_SWITCH_CASE(PT_Uint64, B) \
+ TYPE_SWITCH_CASE(PT_Bool, B) \
+ default: \
+ llvm_unreachable("Unhandled bitcast type"); \
+ } \
+ } while (0)
+
+/// Float is a special case that sometimes needs the floating point semantics
+/// to be available.
+#define BITCAST_TYPE_SWITCH_WITH_FLOAT(Expr, B) \
+ do { \
+ switch (Expr) { \
+ TYPE_SWITCH_CASE(PT_Sint8, B) \
+ TYPE_SWITCH_CASE(PT_Uint8, B) \
+ TYPE_SWITCH_CASE(PT_Sint16, B) \
+ TYPE_SWITCH_CASE(PT_Uint16, B) \
+ TYPE_SWITCH_CASE(PT_Sint32, B) \
+ TYPE_SWITCH_CASE(PT_Uint32, B) \
+ TYPE_SWITCH_CASE(PT_Sint64, B) \
+ TYPE_SWITCH_CASE(PT_Uint64, B) \
+ TYPE_SWITCH_CASE(PT_Bool, B) \
+ TYPE_SWITCH_CASE(PT_Float, B) \
+ default: \
+ llvm_unreachable("Unhandled bitcast type"); \
+ } \
+ } while (0)
+
+/// Rotate things around for big endian targets.
+static void swapBytes(std::byte *M, size_t N) {
+ for (size_t I = 0; I != (N / 2); ++I)
+ std::swap(M[I], M[N - 1 - I]);
+}
+
+/// Track what bytes have been initialized to known values and which ones
+/// have indeterminate value.
+/// All offsets are in bytes.
+struct ByteTracker {
+ std::vector<bool> Initialized;
+ std::vector<std::byte> Data;
+
+ ByteTracker() = default;
+
+ size_t size() const {
+ assert(Initialized.size() == Data.size());
+ return Initialized.size();
+ }
+
+ std::byte *getBytes(size_t Offset) { return Data.data() + Offset; }
+ bool allInitialized(size_t Offset, size_t Size) const {
+ for (size_t I = Offset; I != (Size + Offset); ++I) {
+ if (!Initialized[I])
+ return false;
+ }
+ return true;
+ }
+
+ std::byte *getWritableBytes(size_t Offset, size_t Size, bool InitValue) {
+ assert(Offset >= Data.size());
+ assert(Size > 0);
+
+ size_t OldSize = Data.size();
+ Data.resize(Offset + Size);
+
+ // Everything from the old size to the new offset is indeterminate.
+ for (size_t I = OldSize; I != Offset; ++I)
+ Initialized.push_back(false);
+ for (size_t I = Offset; I != Offset + Size; ++I)
+ Initialized.push_back(InitValue);
+
+ return Data.data() + Offset;
+ }
+
+ void markUninitializedUntil(size_t Offset) {
+ assert(Offset >= Data.size());
+
+ size_t NBytes = Offset - Data.size();
+ for (size_t I = 0; I != NBytes; ++I)
+ Initialized.push_back(false);
+ Data.resize(Offset);
+ }
+
+ void zeroUntil(size_t Offset) {
+ assert(Offset >= Data.size());
+
+ assert(Data.size() == Initialized.size());
+ size_t NBytes = Offset - Data.size();
+ for (size_t I = 0; I != NBytes; ++I) {
+ Initialized.push_back(true);
+ Data.push_back(std::byte{0});
+ }
+ }
+};
+
+struct BitcastBuffer {
+ std::byte *Buff;
+ size_t ByteOffset = 0;
+ size_t Offset = 0;
+ size_t BuffSize;
+ unsigned IndeterminateBits = 0;
+ bool BigEndian;
+
+ constexpr BitcastBuffer(std::byte *Buff, size_t BuffSize, bool BigEndian)
+ : Buff(Buff), BuffSize(BuffSize), BigEndian(BigEndian) {}
+
+ std::byte *getBytes(size_t ByteOffset, size_t N) {
+ assert(ByteOffset >= this->ByteOffset && "we don't support stepping back");
+
+ // All untouched bits before the requested bit offset
+ // are indeterminate values. This will be important later,
+ // because they can't be read into non-uchar/non-std::byte
+ // values.
+ IndeterminateBits += (ByteOffset - this->ByteOffset);
+
+ size_t OldOffset = this->Offset;
+
+ this->Offset += N;
+ this->ByteOffset = ByteOffset + N;
+
+ if (BigEndian)
+ return Buff + BuffSize - OldOffset - N;
+
+ // Little Endian target.
+ return Buff + OldOffset;
+ }
+};
+
+/// We use this to recursively iterate over all fields and elemends of a pointer
+/// and extract relevant data for a bitcast.
+static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset,
+ DataFunc F) {
+ const Descriptor *FieldDesc = P.getFieldDesc();
+ assert(FieldDesc);
+
+ // Primitives.
+ if (FieldDesc->isPrimitive())
+ return F(P, *Ctx.classify(FieldDesc->getType()), Offset);
+
+ // Primitive arrays.
+ if (FieldDesc->isPrimitiveArray()) {
+ QualType ElemType =
+ FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType();
+ size_t ElemSize =
+ Ctx.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
+ PrimType ElemT = *Ctx.classify(ElemType);
+ for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) {
+ if (!F(P.atIndex(I), ElemT, Offset))
+ return false;
+ Offset += ElemSize;
+ }
+ return true;
+ }
+
+ // Composite arrays.
+ if (FieldDesc->isCompositeArray()) {
+ QualType ElemType =
+ FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType();
+ size_t ElemSize =
+ Ctx.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
+ for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) {
+ enumerateData(P.atIndex(I).narrow(), Ctx, Offset, F);
+ Offset += ElemSize;
+ }
+ return true;
+ }
+
+ // Records.
+ if (FieldDesc->isRecord()) {
+ const Record *R = FieldDesc->ElemRecord;
+ const ...
[truncated]
|
Ping |
@AaronBallman This is still in a rough WIP state, but I've added support for bitcasts with bitfields involved, take a look at the tests if you have some time. |
constexpr static unsigned valueReprBytes(const ASTContext &Ctx) { return 1; } | ||
constexpr static unsigned valueReprBits(const ASTContext &Ctx) { | ||
return 8; | ||
} // FIXME: Is this correct? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://eel.is/c++draft/basic#fundamental-10 -- so it's implementation-defined how many bits are in the value representation; we default to 8 (
BoolWidth = BoolAlign = 8; |
I think we may need to use TargetInfo
for these at some point so we're matching the target when constant evaluating on the host. But I think that can happen later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ctx.getTargetInfo().getBoolWidth()
should do the trick
constexpr static unsigned valueReprBytes(const ASTContext &Ctx) { return 1; } | ||
constexpr static unsigned valueReprBits(const ASTContext &Ctx) { | ||
return 8; | ||
} // FIXME: Is this correct? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ctx.getTargetInfo().getBoolWidth()
should do the trick
@@ -134,6 +141,10 @@ class Floating final { | |||
|
|||
return Floating(APFloat(Sem, API)); | |||
} | |||
void bitcastToMemory(std::byte *Buff) { | |||
llvm::APInt API = F.bitcastToAPInt(); | |||
llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should that use getCharWidth()
?
TYPE_SWITCH_CASE(PT_Uint64, B) \ | ||
TYPE_SWITCH_CASE(PT_Bool, B) \ | ||
default: \ | ||
llvm_unreachable("Unhandled bitcast type"); \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rather see a diagnostic here (such diag exist in the current interpreter https://compiler-explorer.com/z/xva6cG6rq )
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably not the right place here? Would fit into CheckBitCastType
instead. The primtypes should all be supported.
✅ With the latest revision this PR passed the C/C++ code formatter. |
} | ||
} | ||
|
||
assert(!P.getField()->isBitField()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is still missing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for your work on this! I enjoyed getting a chance to learn more about Interp
, and though I'm not super familiar with C++ (the language or compiler internals), there definitely seems to be more than a few lessons learned from the ExprConstant
evaluator in here. Kudos!
I did learn a lot about implementing bit-casts as part of #74775 , especially as they relate to bit-fields and endian-ness, and I tried to relay some of those lessons here. I'd recommend checking out the updated tests, I went into some detail describing the behavior of bit-precise types, booleans, and bit-fields (as well as the intersection between all three).
The latest tests covering booleans & bit fields from my branch are at: https://github.com/llvm/llvm-project/blob/2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
My system's clang
(especially clangd
) doesn't particularly love the _BitInt(X)
stuff, so I split that out into its own file: https://github.com/llvm/llvm-project/blob/2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc/clang/test/SemaCXX/constexpr-builtin-bit-cast-bitint.cpp
I hope that's helpful!
llvm::BitVector Initialized; | ||
llvm::BitVector Data; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried llvm::BitVector
to work for me in #74775 , but it was a little awkward; the operations that I ended up wanting from the buffer's backing store for (cross-endian) bit fields were:
getBits(src, [start, end])
, which looks very much like whatgetBytes
here would need (at a byte granularity)copyBitsFrom(dst, [dstStart, dstEnd], src, ...)
, i.e.pushData
's bread and butterclearBits(dst, [start, end])
, this started off beingsetBits
; it's basically the same underlying op formarkUninitializedUntil
I ended up going with APInt
as the alternate spelling for BitVector
because it exposed those operations more or less directly (as extractBits
, insertBits
, and ~setBits
respectively), and it meant two fewer copies to get data into & out of the buffer—especially after extending copyBitsFrom
to take a [srcStart, srcEnd]
range.
Plus, it ended up being very helpful to use the bit-twiddling operators (&
/|
/~
/ <<
, etc.) at least in the prototype/debugging stages to get my head around what was going on, but more on that later.
The main difference that I saw (besides APInt supporting far more operations) was that BitVector
s were grow-able, but I'm not sure that helps here: we should know the size of the object at BitTracker-initialization time, right?
Here, I think another difference is that the end "pointer" of the BitVector is keeping track of where to write to next; I like that, though it'll take some care to make sure it works with cross-endian bit-fields I think. Either way, BitTracker
could also offer that simply by tracking its own bit-sized end pointer.
Bits.markUninitializedUntil(BuffSize * 8); | ||
assert(Bits.size() == BuffSize * 8); | ||
|
||
HasIndeterminateBits = !Bits.allInitialized(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be overly restrictive; I believe it'll disallow cases where we're bit-casting e.g. struct padding bits to primitive padding bits, right?
if (BigEndian) | ||
swapBytes(Buff.data(), BitWidth / 8); | ||
Bits.pushData(Buff.data(), BitOffset, BitWidth); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is going to mishandle cross-endian bit-fields. Consider:
struct Q {
uint16_t q : 15
}
When the host & target endianness match, the mask for writing to q
ought to be 0x7fff
(little endian) or 0xfffe
(big endian), both of which are contiguous runs and so neatly represented by a start + a length (as here). But, after byte-swapping, the write operation looks more like:
*(base + offsetof(Q, q)) = swapped & 0xff7f; // or 0xfeff
Which means the copy is no longer a contiguous run of bits. I ended up drawing a bunch of ASCII diagrams to try and sort it out, e.g.
// below, `x` is an invalid bit, and `p` is a padding bit
// the addressable unit is assumed to be a byte (8 bits)
// higher memory addresses are "down"
// bytes are drawn MSBit -> LSBit (bit labels at the top)
// the byte offset is on the left, and bit offset is on the right
//
// little-endian big-endian
//
// APInt(15, 0b+...-)
//
// 7 0 7 0
// 0 [.......-] 0 0 [x+......] 0
// 1 [x+......] 8 1 [.......-] 8
//
// APInt(10, 0b+...-)
//
// 7 0 7 0
// 0 [.......-] 0 0 [xxxxxx+.] 0
// 1 [xxxxxx+.] 8 1 [.......-] 8
//
// struct { uint16_t f : 15 }
//
// 7 0 7 0
// 0 [......-p] 0 0 [p+......] 0
// 1 [xxxxxx+.] 8 1 [.-xxxxxx] 8
I went through a number of iterations on #74775, but I settled on the general operation being:
llvm-project/clang/lib/AST/ExprConstant.cpp
Lines 7042 to 7062 in 2e0d3f8
void writeMasked(const uint64_t Offset, const APInt &Input, | |
const APInt &Mask) { | |
assert(Input.getBitWidth() == Mask.getBitWidth()); | |
const uint64_t BW = Input.getBitWidth(); | |
const BitSlice Dest = {Offset, BW}; | |
auto write = [&](const APInt &Input, const APInt &Mask) { | |
assert((~getBits(Invalid, Dest) & Mask) == 0 && "overwriting data?"); | |
const APInt Val = (Input & Mask) | (getBits(Data, Dest) & ~Mask); | |
const APInt Written = getBits(Invalid, Dest) ^ Mask; | |
copyBitsFrom(Data, Dest, Val, {0, BW}); | |
copyBitsFrom(Invalid, Dest, Written, {0, BW}); | |
}; | |
if (!IsNativeEndian && BW > CharBit) { | |
write(Input.byteSwap(), Mask.byteSwap()); | |
return; | |
} | |
write(Input, Mask); | |
} |
Which is a bit pricey for what I imagined the common cases to be (full-word copies, host/target have matching little-endianness, and so on): so I ended up with a pushData
-shaped method too, which is basically a big tower of asserts on top of a memcpy
(for the data) and memset
(for the validity bits).
static Boolean bitcastFromMemory(const std::byte *Buff, unsigned BitWidth) { | ||
assert(BitWidth == 8); | ||
bool Val = static_cast<bool>(*Buff); | ||
return Boolean(Val); | ||
} | ||
|
||
void bitcastToMemory(std::byte *Buff) { std::memcpy(Buff, &V, sizeof(V)); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a tradeoff to be made, here: booleans are among the primitive types that have padding bits, though I believe them to be by far the most commonly used (other contenders include long double
and _BitInt(4)
).
The wrinkle is something like this:
constexpr bool b() {
return std::bit_cast<bool, uint8_t>(0x02);
}
int main() {
if constexpr (b() == 0)
return b();
return 1;
}
For that sample, gcc
produces a binary that exits with code 2: https://compiler-explorer.com/z/jes71o85h , and current clang refuses to compile with a note: value 2 cannot be represented in type 'bool'
. To preserve that behavior, I think that means here bitcastToMemory
ought to be a fail-able operation that checks all bits above the LSB are zero.
There's another question about whether it's worth giving bool
special treatment: should only Boolean::bitcastToMemory
be a fail-able operation? In other words, given
constexpr auto a = std::bit_cast<uint8_t>(false);
constexpr auto b = std::bit_cast<__int128_t>((long double)0);
constexpr auto c = std::bit_cast<bool>('\x02');
constexpr auto d = std::bit_cast<long double>((__int128_t)~0);
Right now, a
& d
succeed. b
& c
fail to produce constant values for different reasons: c
because the evaluator sees that it'd lose information by allowing the bit-cast, and b
because it doesn't want to check.
I see two fully consistent interpretations, here (considering both implementation & semantics):
a
fails for the same reason asb
, and thenInterp
is free to allowc
&d
as a simple memcpy + mask (to throw away any padding bits)a
andb
both succeed, but thenc
andd
must fail for the same reason.
Both are backwards-incompatible changes, though: code that used to compile with the old constant interpreter would fail under the new one. The third choice preserves backwards compatibility by holding bool
out as the only type whose values get checked for represent-ability. (NB: that's a link to the code from #74775 (the current implementation is subtly different, but the behavior is the same because the current evaluator doesn't [fully] support _BitInt(X)
)
size_t BitsHandled = 0; | ||
// Read all full bytes first | ||
for (size_t I = 0; I != BitWidth / 8; ++I) { | ||
for (unsigned X = 0; X != 8; ++X) { | ||
Data.push_back((data[I] & std::byte(1 << X)) != std::byte{0}); | ||
Initialized.push_back(true); | ||
++BitsHandled; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll defer to your expertise as to whether this is appropriate, here, but it does seem a little unfortunate to do a bit-by-bit copy. I think the in-band type information means we're more or less stuck doing things field-by-field (because Interp
will lay out a struct { char c[3]; };
very differently from a struct { char c; short s; }
?), as with the existing ExprConstant interpreter, but my intuition says that I ought to expect a uint64_t
field to take roughly 8x as long to copy as a char
with this implementation, despite both of them fitting within a machine word on my 64-bit system. Does my thinking match yours, here?
This is a subset of #68288, with hopefully narrower scope. It does not support bitcasting to non-integral types yet. Bitfields are supported, but only if they result in a full byte-sized final buffer. It does not support casting from null-pointers yet or casting from indeterminate bits. The tests are from #68288 and partially from #74775. The `BitcastBuffer` struct is currently always working in single bits, but I plan to (try to) optimize this for the common full-byte case.
This is a subset of llvm#68288, with hopefully narrower scope. It does not support bitcasting to non-integral types yet. Bitfields are supported, but only if they result in a full byte-sized final buffer. It does not support casting from null-pointers yet or casting from indeterminate bits. The tests are from llvm#68288 and partially from llvm#74775. The `BitcastBuffer` struct is currently always working in single bits, but I plan to (try to) optimize this for the common full-byte case.
This is a subset of llvm#68288, with hopefully narrower scope. It does not support bitcasting to non-integral types yet. Bitfields are supported, but only if they result in a full byte-sized final buffer. It does not support casting from null-pointers yet or casting from indeterminate bits. The tests are from llvm#68288 and partially from llvm#74775. The `BitcastBuffer` struct is currently always working in single bits, but I plan to (try to) optimize this for the common full-byte case.
This has been implemented via a series of other PRs now. |
From https://reviews.llvm.org/D154951 - which is never gonna make it out of Phabricator anyway.
The tests are failing because https://reviews.llvm.org/D154581 is not pushed.
Aaron's review mentioned that we should support bitfields before pushing this.