[clang][Interp] Implement __builtin_bit_cast #68288

tbaederr · 2023-10-05T08:12:15Z

From https://reviews.llvm.org/D154951 - which is never gonna make it out of Phabricator anyway.

The tests are failing because https://reviews.llvm.org/D154581 is not pushed.

Aaron's review mentioned that we should support bitfields before pushing this.

llvmbot · 2023-10-05T08:13:26Z

@llvm/pr-subscribers-clang

Changes

From https://reviews.llvm.org/D154951 - which is never gonna make it out of Phabricator anyway.

The tests are failing because https://reviews.llvm.org/D154581 is not pushed.

Aaron's review mentioned that we should support bitfields before pushing this.

Patch is 56.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68288.diff

13 Files Affected:

(modified) clang/lib/AST/CMakeLists.txt (+1)
(modified) clang/lib/AST/Interp/Boolean.h (+13-2)
(modified) clang/lib/AST/Interp/ByteCodeExprGen.cpp (+71)
(modified) clang/lib/AST/Interp/ByteCodeExprGen.h (+1)
(modified) clang/lib/AST/Interp/Floating.h (+11)
(modified) clang/lib/AST/Interp/Integral.h (+18-1)
(modified) clang/lib/AST/Interp/Interp.cpp (+17)
(modified) clang/lib/AST/Interp/Interp.h (+66)
(added) clang/lib/AST/Interp/InterpBitcast.cpp (+482)
(modified) clang/lib/AST/Interp/Opcodes.td (+17)
(modified) clang/lib/AST/Interp/PrimType.h (+4)
(added) clang/test/AST/Interp/builtin-bit-cast.cpp (+683)
(modified) clang/test/AST/Interp/literals.cpp (+5)

diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index fe3f8c485ec1c56..5a188b583022bda 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -74,6 +74,7 @@ add_clang_library(clangAST
   Interp/Frame.cpp
   Interp/Function.cpp
   Interp/InterpBuiltin.cpp
+  Interp/InterpBitcast.cpp
   Interp/Floating.cpp
   Interp/Interp.cpp
   Interp/InterpBlock.cpp
diff --git a/clang/lib/AST/Interp/Boolean.h b/clang/lib/AST/Interp/Boolean.h
index c3ed3d61f76ca1c..d395264ab2de448 100644
--- a/clang/lib/AST/Interp/Boolean.h
+++ b/clang/lib/AST/Interp/Boolean.h
@@ -9,14 +9,15 @@
 #ifndef LLVM_CLANG_AST_INTERP_BOOLEAN_H
 #define LLVM_CLANG_AST_INTERP_BOOLEAN_H
 
-#include <cstddef>
-#include <cstdint>
 #include "Integral.h"
 #include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/ComparisonCategories.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
 
 namespace clang {
 namespace interp {
@@ -65,6 +66,9 @@ class Boolean final {
   Boolean toUnsigned() const { return *this; }
 
   constexpr static unsigned bitWidth() { return 1; }
+  constexpr static unsigned objectReprBits() { return 8; }
+  constexpr static unsigned valueReprBytes(const ASTContext &Ctx) { return 1; }
+
   bool isZero() const { return !V; }
   bool isMin() const { return isZero(); }
 
@@ -106,6 +110,13 @@ class Boolean final {
     return Boolean(!Value.isZero());
   }
 
+  static Boolean bitcastFromMemory(const std::byte *Buff) {
+    bool Val = static_cast<bool>(*Buff);
+    return Boolean(Val);
+  }
+
+  void bitcastToMemory(std::byte *Buff) { std::memcpy(Buff, &V, sizeof(V)); }
+
   static Boolean zero() { return from(false); }
 
   template <typename T>
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 3cf8bf874b1d210..8cc010d6f100ccb 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -67,6 +67,74 @@ template <class Emitter> class OptionScope final {
 } // namespace interp
 } // namespace clang
 
+//  This function is constexpr if and only if To, From, and the types of
+//  all subobjects of To and From are types T such that...
+//  (3.1) - is_union_v<T> is false;
+//  (3.2) - is_pointer_v<T> is false;
+//  (3.3) - is_member_pointer_v<T> is false;
+//  (3.4) - is_volatile_v<T> is false; and
+//  (3.5) - T has no non-static data members of reference type
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
+  const Expr *SubExpr = E->getSubExpr();
+  QualType FromType = SubExpr->getType();
+  QualType ToType = E->getType();
+  std::optional<PrimType> ToT = classify(ToType);
+
+  // FIXME: This is wrong. We need to do the bitcast and then
+  //   throw away the result, so we still get the diagnostics.
+  if (DiscardResult)
+    return this->discard(SubExpr);
+
+  if (ToType->isNullPtrType()) {
+    if (!this->discard(SubExpr))
+      return false;
+
+    return this->emitNullPtr(E);
+  }
+
+  if (FromType->isNullPtrType() && ToT) {
+    if (!this->discard(SubExpr))
+      return false;
+
+    return visitZeroInitializer(ToType, E);
+  }
+  assert(!ToType->isReferenceType());
+
+  // Get a pointer to the value-to-cast on the stack.
+  if (!this->visit(SubExpr))
+    return false;
+
+  if (!ToT || ToT == PT_Ptr) {
+    // Conversion to an array or record type.
+    return this->emitBitCastPtr(E);
+  }
+
+  assert(ToT);
+
+  // Conversion to a primitive type. FromType can be another
+  // primitive type, or a record/array.
+  //
+  // Same thing for floats, but we need the target
+  // semantics here.
+  if (ToT == PT_Float) {
+    const auto *TargetSemantics = &Ctx.getFloatSemantics(ToType);
+    CharUnits FloatSize = Ctx.getASTContext().getTypeSizeInChars(ToType);
+    return this->emitBitCastFP(TargetSemantics, FloatSize.getQuantity(), E);
+  }
+
+  bool ToTypeIsUChar = (ToType->isSpecificBuiltinType(BuiltinType::UChar) ||
+                        ToType->isSpecificBuiltinType(BuiltinType::Char_U));
+
+  if (!this->emitBitCast(*ToT, ToTypeIsUChar || ToType->isStdByteType(), E))
+    return false;
+
+  if (DiscardResult)
+    return this->emitPop(*ToT, E);
+
+  return true;
+}
+
 template <class Emitter>
 bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
   auto *SubExpr = CE->getSubExpr();
@@ -87,6 +155,9 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
         });
   }
 
+  case CK_LValueToRValueBitCast:
+    return this->emitBuiltinBitCast(CE);
+
   case CK_UncheckedDerivedToBase:
   case CK_DerivedToBase: {
     if (!this->visit(SubExpr))
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 47a3f75f13459d0..8f04c4809ccf41d 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -280,6 +280,7 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
   bool emitRecordDestruction(const Descriptor *Desc);
   unsigned collectBaseOffset(const RecordType *BaseType,
                              const RecordType *DerivedType);
+  bool emitBuiltinBitCast(const CastExpr *E);
 
 protected:
   /// Variable to storage mapping.
diff --git a/clang/lib/AST/Interp/Floating.h b/clang/lib/AST/Interp/Floating.h
index a22b3fa79f3992f..727d37520047b77 100644
--- a/clang/lib/AST/Interp/Floating.h
+++ b/clang/lib/AST/Interp/Floating.h
@@ -15,6 +15,7 @@
 
 #include "Primitives.h"
 #include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
 #include "llvm/ADT/APFloat.h"
 
 namespace clang {
@@ -84,6 +85,12 @@ class Floating final {
   }
 
   unsigned bitWidth() const { return F.semanticsSizeInBits(F.getSemantics()); }
+  unsigned objectReprBits() { return F.semanticsSizeInBits(F.getSemantics()); }
+
+  unsigned valueReprBytes(const ASTContext &Ctx) {
+    return Ctx.toCharUnitsFromBits(F.semanticsSizeInBits(F.getSemantics()))
+        .getQuantity();
+  }
 
   bool isSigned() const { return true; }
   bool isNegative() const { return F.isNegative(); }
@@ -133,6 +140,10 @@ class Floating final {
 
     return Floating(APFloat(Sem, API));
   }
+  void bitcastToMemory(std::byte *Buff) {
+    llvm::APInt API = F.bitcastToAPInt();
+    llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8);
+  }
 
   // === Serialization support ===
   size_t bytesToSerialize() const {
diff --git a/clang/lib/AST/Interp/Integral.h b/clang/lib/AST/Interp/Integral.h
index 4dbe9c9bcb14b43..9c78bb04876ea02 100644
--- a/clang/lib/AST/Interp/Integral.h
+++ b/clang/lib/AST/Interp/Integral.h
@@ -13,8 +13,9 @@
 #ifndef LLVM_CLANG_AST_INTERP_INTEGRAL_H
 #define LLVM_CLANG_AST_INTERP_INTEGRAL_H
 
-#include "clang/AST/ComparisonCategories.h"
 #include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/ComparisonCategories.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -116,6 +117,10 @@ template <unsigned Bits, bool Signed> class Integral final {
   }
 
   constexpr static unsigned bitWidth() { return Bits; }
+  constexpr static unsigned objectReprBits() { return Bits; }
+  constexpr static unsigned valueReprBytes(const ASTContext &Ctx) {
+    return Ctx.toCharUnitsFromBits(Bits).getQuantity();
+  }
 
   bool isZero() const { return !V; }
 
@@ -182,6 +187,18 @@ template <unsigned Bits, bool Signed> class Integral final {
     return Integral(Value);
   }
 
+  static Integral bitcastFromMemory(const std::byte *Buff) {
+    ReprT V;
+
+    std::memcpy(&V, Buff, sizeof(ReprT));
+    return Integral(V);
+  }
+
+  void bitcastToMemory(std::byte *Buff) const {
+    assert(Buff);
+    std::memcpy(Buff, &V, sizeof(ReprT));
+  }
+
   static bool inRange(int64_t Value, unsigned NumBits) {
     return CheckRange<ReprT, Min, Max>(Value);
   }
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 8e851595963184c..d2c65daafc7d135 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -576,7 +576,24 @@ bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR) {
       return false;
     }
   }
+  return false;
+}
+
+bool CheckBitcast(InterpState &S, CodePtr OpPC, unsigned IndeterminateBits,
+                  bool TargetIsUCharOrByte) {
+
+  // This is always fine.
+  if (IndeterminateBits == 0)
+    return true;
+
+  // Indeterminate bits can only be bitcast to unsigned char or std::byte.
+  if (TargetIsUCharOrByte)
+    return true;
 
+  const Expr *E = S.Current->getExpr(OpPC);
+  QualType ExprType = E->getType();
+  S.FFDiag(E, diag::note_constexpr_bit_cast_indet_dest)
+      << ExprType << S.getLangOpts().CharIsSigned << E->getSourceRange();
   return false;
 }
 
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index d62e64bedb213ac..c14f890e188ecdb 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -183,6 +183,9 @@ bool CheckFloatResult(InterpState &S, CodePtr OpPC, const Floating &Result,
 /// Checks why the given DeclRefExpr is invalid.
 bool CheckDeclRef(InterpState &S, CodePtr OpPC, const DeclRefExpr *DR);
 
+bool CheckBitcast(InterpState &S, CodePtr OpPC, unsigned IndeterminateBits,
+                  bool TargetIsUCharOrByte);
+
 /// Interpreter entry point.
 bool Interpret(InterpState &S, APValue &Result);
 
@@ -194,6 +197,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
 bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
                        llvm::ArrayRef<int64_t> ArrayIndices, int64_t &Result);
 
+/// Perform a bitcast of all fields of P into Buff. This performs the
+/// actions of a __builtin_bit_cast expression when the target type
+/// is primitive.
+bool DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &P, std::byte *Buff,
+               size_t BuffSize, unsigned &IndeterminateBits);
+
+/// Perform a bitcast of all fields of P into the fields of DestPtr.
+/// This performs the actions of a __builtin_bit_cast expression when
+/// the target type is a composite type.
+bool DoBitCastToPtr(InterpState &S, const Pointer &P, Pointer &DestPtr,
+                    CodePtr PC);
+
 enum class ArithOp { Add, Sub };
 
 //===----------------------------------------------------------------------===//
@@ -1557,6 +1572,57 @@ template <PrimType TIn, PrimType TOut> bool Cast(InterpState &S, CodePtr OpPC) {
   return true;
 }
 
+template <PrimType Name, class ToT = typename PrimConv<Name>::T>
+bool BitCast(InterpState &S, CodePtr OpPC, bool TargetIsUCharOrByte) {
+  const Pointer &FromPtr = S.Stk.pop<Pointer>();
+
+  size_t BuffSize = ToT::valueReprBytes(S.getCtx());
+  std::vector<std::byte> Buff(BuffSize);
+  unsigned IndeterminateBits = 0;
+
+  if (!DoBitCast(S, OpPC, FromPtr, Buff.data(), BuffSize, IndeterminateBits))
+    return false;
+
+  if (!CheckBitcast(S, OpPC, IndeterminateBits, TargetIsUCharOrByte))
+    return false;
+
+  S.Stk.push<ToT>(ToT::bitcastFromMemory(Buff.data()));
+  return true;
+}
+
+/// Bitcast TO a float.
+inline bool BitCastFP(InterpState &S, CodePtr OpPC,
+                      const llvm::fltSemantics *Sem, uint32_t TargetSize) {
+  const Pointer &FromPtr = S.Stk.pop<Pointer>();
+
+  std::vector<std::byte> Buff(TargetSize);
+  unsigned IndeterminateBits = 0;
+
+  if (!DoBitCast(S, OpPC, FromPtr, Buff.data(), TargetSize, IndeterminateBits))
+    return false;
+
+  if (!CheckBitcast(S, OpPC, IndeterminateBits, /*TargetIsUCharOrByte=*/false))
+    return false;
+
+  S.Stk.push<Floating>(Floating::bitcastFromMemory(Buff.data(), *Sem));
+  return true;
+}
+
+/// 1) Pops a pointer from the stack
+/// 2) Peeks a pointer
+/// 3) Bitcasts the contents of the first pointer to the
+///    fields of the second pointer.
+inline bool BitCastPtr(InterpState &S, CodePtr OpPC) {
+  const Pointer &FromPtr = S.Stk.pop<Pointer>();
+  Pointer &ToPtr = S.Stk.peek<Pointer>();
+
+  // FIXME: We should CheckLoad() for FromPtr and ToPtr here, I think.
+  if (!DoBitCastToPtr(S, FromPtr, ToPtr, OpPC))
+    return false;
+
+  return true;
+}
+
 /// 1) Pops a Floating from the stack.
 /// 2) Pushes a new floating on the stack that uses the given semantics.
 inline bool CastFP(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem,
diff --git a/clang/lib/AST/Interp/InterpBitcast.cpp b/clang/lib/AST/Interp/InterpBitcast.cpp
new file mode 100644
index 000000000000000..91326fc8e788080
--- /dev/null
+++ b/clang/lib/AST/Interp/InterpBitcast.cpp
@@ -0,0 +1,482 @@
+//===--- InterpBitcast.cpp - Interpreter for the constexpr VM ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "Boolean.h"
+#include "Interp.h"
+#include "PrimType.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetInfo.h"
+
+namespace clang {
+namespace interp {
+
+// TODO: Try to e-duplicate the primitive and composite versions.
+
+/// Used to iterate over pointer fields.
+using DataFunc =
+    llvm::function_ref<bool(const Pointer &P, PrimType Ty, size_t Offset)>;
+
+#define BITCAST_TYPE_SWITCH(Expr, B)                                           \
+  do {                                                                         \
+    switch (Expr) {                                                            \
+      TYPE_SWITCH_CASE(PT_Sint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Uint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Sint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Bool, B)                                             \
+    default:                                                                   \
+      llvm_unreachable("Unhandled bitcast type");                              \
+    }                                                                          \
+  } while (0)
+
+/// Float is a special case that sometimes needs the floating point semantics
+/// to be available.
+#define BITCAST_TYPE_SWITCH_WITH_FLOAT(Expr, B)                                \
+  do {                                                                         \
+    switch (Expr) {                                                            \
+      TYPE_SWITCH_CASE(PT_Sint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Uint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Sint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Bool, B)                                             \
+      TYPE_SWITCH_CASE(PT_Float, B)                                            \
+    default:                                                                   \
+      llvm_unreachable("Unhandled bitcast type");                              \
+    }                                                                          \
+  } while (0)
+
+/// Rotate things around for big endian targets.
+static void swapBytes(std::byte *M, size_t N) {
+  for (size_t I = 0; I != (N / 2); ++I)
+    std::swap(M[I], M[N - 1 - I]);
+}
+
+/// Track what bytes have been initialized to known values and which ones
+/// have indeterminate value.
+/// All offsets are in bytes.
+struct ByteTracker {
+  std::vector<bool> Initialized;
+  std::vector<std::byte> Data;
+
+  ByteTracker() = default;
+
+  size_t size() const {
+    assert(Initialized.size() == Data.size());
+    return Initialized.size();
+  }
+
+  std::byte *getBytes(size_t Offset) { return Data.data() + Offset; }
+  bool allInitialized(size_t Offset, size_t Size) const {
+    for (size_t I = Offset; I != (Size + Offset); ++I) {
+      if (!Initialized[I])
+        return false;
+    }
+    return true;
+  }
+
+  std::byte *getWritableBytes(size_t Offset, size_t Size, bool InitValue) {
+    assert(Offset >= Data.size());
+    assert(Size > 0);
+
+    size_t OldSize = Data.size();
+    Data.resize(Offset + Size);
+
+    // Everything from the old size to the new offset is indeterminate.
+    for (size_t I = OldSize; I != Offset; ++I)
+      Initialized.push_back(false);
+    for (size_t I = Offset; I != Offset + Size; ++I)
+      Initialized.push_back(InitValue);
+
+    return Data.data() + Offset;
+  }
+
+  void markUninitializedUntil(size_t Offset) {
+    assert(Offset >= Data.size());
+
+    size_t NBytes = Offset - Data.size();
+    for (size_t I = 0; I != NBytes; ++I)
+      Initialized.push_back(false);
+    Data.resize(Offset);
+  }
+
+  void zeroUntil(size_t Offset) {
+    assert(Offset >= Data.size());
+
+    assert(Data.size() == Initialized.size());
+    size_t NBytes = Offset - Data.size();
+    for (size_t I = 0; I != NBytes; ++I) {
+      Initialized.push_back(true);
+      Data.push_back(std::byte{0});
+    }
+  }
+};
+
+struct BitcastBuffer {
+  std::byte *Buff;
+  size_t ByteOffset = 0;
+  size_t Offset = 0;
+  size_t BuffSize;
+  unsigned IndeterminateBits = 0;
+  bool BigEndian;
+
+  constexpr BitcastBuffer(std::byte *Buff, size_t BuffSize, bool BigEndian)
+      : Buff(Buff), BuffSize(BuffSize), BigEndian(BigEndian) {}
+
+  std::byte *getBytes(size_t ByteOffset, size_t N) {
+    assert(ByteOffset >= this->ByteOffset && "we don't support stepping back");
+
+    // All untouched bits before the requested bit offset
+    // are indeterminate values. This will be important later,
+    // because they can't be read into non-uchar/non-std::byte
+    // values.
+    IndeterminateBits += (ByteOffset - this->ByteOffset);
+
+    size_t OldOffset = this->Offset;
+
+    this->Offset += N;
+    this->ByteOffset = ByteOffset + N;
+
+    if (BigEndian)
+      return Buff + BuffSize - OldOffset - N;
+
+    // Little Endian target.
+    return Buff + OldOffset;
+  }
+};
+
+/// We use this to recursively iterate over all fields and elemends of a pointer
+/// and extract relevant data for a bitcast.
+static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset,
+                          DataFunc F) {
+  const Descriptor *FieldDesc = P.getFieldDesc();
+  assert(FieldDesc);
+
+  // Primitives.
+  if (FieldDesc->isPrimitive())
+    return F(P, *Ctx.classify(FieldDesc->getType()), Offset);
+
+  // Primitive arrays.
+  if (FieldDesc->isPrimitiveArray()) {
+    QualType ElemType =
+        FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType();
+    size_t ElemSize =
+        Ctx.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
+    PrimType ElemT = *Ctx.classify(ElemType);
+    for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) {
+      if (!F(P.atIndex(I), ElemT, Offset))
+        return false;
+      Offset += ElemSize;
+    }
+    return true;
+  }
+
+  // Composite arrays.
+  if (FieldDesc->isCompositeArray()) {
+    QualType ElemType =
+        FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType();
+    size_t ElemSize =
+        Ctx.getASTContext().getTypeSizeInChars(ElemType).getQuantity();
+    for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) {
+      enumerateData(P.atIndex(I).narrow(), Ctx, Offset, F);
+      Offset += ElemSize;
+    }
+    return true;
+  }
+
+  // Records.
+  if (FieldDesc->isRecord()) {
+    const Record *R = FieldDesc->ElemRecord;
+    const ...
[truncated]

tbaederr · 2023-10-13T08:39:54Z

Ping

tbaederr · 2023-10-18T10:20:18Z

@AaronBallman This is still in a rough WIP state, but I've added support for bitcasts with bitfields involved, take a look at the tests if you have some time.

AaronBallman · 2023-10-26T17:13:38Z

clang/lib/AST/Interp/Boolean.h

+  constexpr static unsigned valueReprBytes(const ASTContext &Ctx) { return 1; }
+  constexpr static unsigned valueReprBits(const ASTContext &Ctx) {
+    return 8;
+  } // FIXME: Is this correct?


https://eel.is/c++draft/basic#fundamental-10 -- so it's implementation-defined how many bits are in the value representation; we default to 8 (

llvm-project/clang/lib/Basic/TargetInfo.cpp

Line 72 in 5975002

BoolWidth = BoolAlign = 8;

) and I didn't see any targets that use a different value.

I think we may need to use TargetInfo for these at some point so we're matching the target when constant evaluating on the host. But I think that can happen later.

Ctx.getTargetInfo().getBoolWidth() should do the trick

clang/lib/AST/Interp/InterpBitcast.cpp

clang/test/AST/Interp/builtin-bit-cast.cpp

clang/lib/AST/Interp/Boolean.h

cor3ntin · 2023-11-24T10:36:19Z

clang/lib/AST/Interp/Boolean.h

+  constexpr static unsigned valueReprBytes(const ASTContext &Ctx) { return 1; }
+  constexpr static unsigned valueReprBits(const ASTContext &Ctx) {
+    return 8;
+  } // FIXME: Is this correct?


Ctx.getTargetInfo().getBoolWidth() should do the trick

cor3ntin · 2023-11-24T10:42:17Z

clang/lib/AST/Interp/Floating.h

@@ -134,6 +141,10 @@ class Floating final {

    return Floating(APFloat(Sem, API));
  }
+  void bitcastToMemory(std::byte *Buff) {
+    llvm::APInt API = F.bitcastToAPInt();
+    llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8);


Should that use getCharWidth() ?

clang/lib/AST/Interp/Interp.h

clang/lib/AST/Interp/ByteCodeExprGen.cpp

cor3ntin · 2023-11-24T10:53:10Z

clang/lib/AST/Interp/InterpBitcast.cpp

+      TYPE_SWITCH_CASE(PT_Uint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Bool, B)                                             \
+    default:                                                                   \
+      llvm_unreachable("Unhandled bitcast type");                              \


I'd rather see a diagnostic here (such diag exist in the current interpreter https://compiler-explorer.com/z/xva6cG6rq )

Probably not the right place here? Would fit into CheckBitCastType instead. The primtypes should all be supported.

clang/lib/AST/Interp/InterpBitcast.cpp

github-actions · 2023-11-27T08:58:06Z

✅ With the latest revision this PR passed the C/C++ code formatter.

tbaederr · 2023-11-27T16:10:44Z

clang/lib/AST/Interp/InterpBitcast.cpp

+            }
+          }
+
+          assert(!P.getField()->isBitField());


This is still missing.

sethp

Thanks for your work on this! I enjoyed getting a chance to learn more about Interp, and though I'm not super familiar with C++ (the language or compiler internals), there definitely seems to be more than a few lessons learned from the ExprConstant evaluator in here. Kudos!

I did learn a lot about implementing bit-casts as part of #74775 , especially as they relate to bit-fields and endian-ness, and I tried to relay some of those lessons here. I'd recommend checking out the updated tests, I went into some detail describing the behavior of bit-precise types, booleans, and bit-fields (as well as the intersection between all three).

The latest tests covering booleans & bit fields from my branch are at: https://github.com/llvm/llvm-project/blob/2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp

My system's clang (especially clangd) doesn't particularly love the _BitInt(X) stuff, so I split that out into its own file: https://github.com/llvm/llvm-project/blob/2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc/clang/test/SemaCXX/constexpr-builtin-bit-cast-bitint.cpp

I hope that's helpful!

sethp · 2024-02-05T14:50:45Z

clang/lib/AST/Interp/InterpBitcast.cpp

+  llvm::BitVector Initialized;
+  llvm::BitVector Data;


I tried llvm::BitVector to work for me in #74775 , but it was a little awkward; the operations that I ended up wanting from the buffer's backing store for (cross-endian) bit fields were:

getBits(src, [start, end]) , which looks very much like what getBytes here would need (at a byte granularity)

copyBitsFrom(dst, [dstStart, dstEnd], src, ...), i.e. pushData's bread and butter

clearBits(dst, [start, end]), this started off being setBits; it's basically the same underlying op for markUninitializedUntil

I ended up going with APInt as the alternate spelling for BitVector because it exposed those operations more or less directly (as extractBits, insertBits, and ~setBits respectively), and it meant two fewer copies to get data into & out of the buffer—especially after extending copyBitsFrom to take a [srcStart, srcEnd] range.

Plus, it ended up being very helpful to use the bit-twiddling operators (&/|/~/ <<, etc.) at least in the prototype/debugging stages to get my head around what was going on, but more on that later.

The main difference that I saw (besides APInt supporting far more operations) was that BitVectors were grow-able, but I'm not sure that helps here: we should know the size of the object at BitTracker-initialization time, right?

Here, I think another difference is that the end "pointer" of the BitVector is keeping track of where to write to next; I like that, though it'll take some care to make sure it works with cross-endian bit-fields I think. Either way, BitTracker could also offer that simply by tracking its own bit-sized end pointer.

sethp · 2024-02-05T15:40:37Z

clang/lib/AST/Interp/InterpBitcast.cpp

+  Bits.markUninitializedUntil(BuffSize * 8);
+  assert(Bits.size() == BuffSize * 8);
+
+  HasIndeterminateBits = !Bits.allInitialized();


This might be overly restrictive; I believe it'll disallow cases where we're bit-casting e.g. struct padding bits to primitive padding bits, right?

sethp · 2024-02-05T16:20:06Z

clang/lib/AST/Interp/InterpBitcast.cpp

+          if (BigEndian)
+            swapBytes(Buff.data(), BitWidth / 8);
+          Bits.pushData(Buff.data(), BitOffset, BitWidth);


I think this is going to mishandle cross-endian bit-fields. Consider:

struct Q { uint16_t q : 15 }

When the host & target endianness match, the mask for writing to q ought to be 0x7fff (little endian) or 0xfffe (big endian), both of which are contiguous runs and so neatly represented by a start + a length (as here). But, after byte-swapping, the write operation looks more like:

*(base + offsetof(Q, q)) = swapped & 0xff7f; // or 0xfeff

Which means the copy is no longer a contiguous run of bits. I ended up drawing a bunch of ASCII diagrams to try and sort it out, e.g.

// below, `x` is an invalid bit, and `p` is a padding bit // the addressable unit is assumed to be a byte (8 bits) // higher memory addresses are "down" // bytes are drawn MSBit -> LSBit (bit labels at the top) // the byte offset is on the left, and bit offset is on the right // // little-endian big-endian // // APInt(15, 0b+...-) // // 7 0 7 0 // 0 [.......-] 0 0 [x+......] 0 // 1 [x+......] 8 1 [.......-] 8 // // APInt(10, 0b+...-) // // 7 0 7 0 // 0 [.......-] 0 0 [xxxxxx+.] 0 // 1 [xxxxxx+.] 8 1 [.......-] 8 // // struct { uint16_t f : 15 } // // 7 0 7 0 // 0 [......-p] 0 0 [p+......] 0 // 1 [xxxxxx+.] 8 1 [.-xxxxxx] 8

I went through a number of iterations on #74775, but I settled on the general operation being:

llvm-project/clang/lib/AST/ExprConstant.cpp

Lines 7042 to 7062 in 2e0d3f8

void writeMasked(const uint64_t Offset, const APInt &Input,

const APInt &Mask) {

assert(Input.getBitWidth() == Mask.getBitWidth());

const uint64_t BW = Input.getBitWidth();

const BitSlice Dest = {Offset, BW};

auto write = [&](const APInt &Input, const APInt &Mask) {

assert((~getBits(Invalid, Dest) & Mask) == 0 && "overwriting data?");

const APInt Val = (Input & Mask) | (getBits(Data, Dest) & ~Mask);

const APInt Written = getBits(Invalid, Dest) ^ Mask;

copyBitsFrom(Data, Dest, Val, {0, BW});

copyBitsFrom(Invalid, Dest, Written, {0, BW});

};

if (!IsNativeEndian && BW > CharBit) {

write(Input.byteSwap(), Mask.byteSwap());

return;

}

write(Input, Mask);

}

Which is a bit pricey for what I imagined the common cases to be (full-word copies, host/target have matching little-endianness, and so on): so I ended up with a pushData-shaped method too, which is basically a big tower of asserts on top of a memcpy (for the data) and memset (for the validity bits).

sethp · 2024-02-05T17:49:47Z

clang/lib/AST/Interp/Boolean.h

+  static Boolean bitcastFromMemory(const std::byte *Buff, unsigned BitWidth) {
+    assert(BitWidth == 8);
+    bool Val = static_cast<bool>(*Buff);
+    return Boolean(Val);
+  }
+
+  void bitcastToMemory(std::byte *Buff) { std::memcpy(Buff, &V, sizeof(V)); }


There's a tradeoff to be made, here: booleans are among the primitive types that have padding bits, though I believe them to be by far the most commonly used (other contenders include long double and _BitInt(4)).

The wrinkle is something like this:

constexpr bool b() { return std::bit_cast<bool, uint8_t>(0x02); } int main() { if constexpr (b() == 0) return b(); return 1; }

For that sample, gcc produces a binary that exits with code 2: https://compiler-explorer.com/z/jes71o85h , and current clang refuses to compile with a note: value 2 cannot be represented in type 'bool'. To preserve that behavior, I think that means here bitcastToMemory ought to be a fail-able operation that checks all bits above the LSB are zero.

There's another question about whether it's worth giving bool special treatment: should only Boolean::bitcastToMemory be a fail-able operation? In other words, given

constexpr auto a = std::bit_cast<uint8_t>(false); constexpr auto b = std::bit_cast<__int128_t>((long double)0); constexpr auto c = std::bit_cast<bool>('\x02'); constexpr auto d = std::bit_cast<long double>((__int128_t)~0);

Right now, a & d succeed. b & c fail to produce constant values for different reasons: c because the evaluator sees that it'd lose information by allowing the bit-cast, and b because it doesn't want to check.

I see two fully consistent interpretations, here (considering both implementation & semantics):

a fails for the same reason as b, and then Interp is free to allow c & d as a simple memcpy + mask (to throw away any padding bits)

a and b both succeed, but then c and d must fail for the same reason.

Both are backwards-incompatible changes, though: code that used to compile with the old constant interpreter would fail under the new one. The third choice preserves backwards compatibility by holding bool out as the only type whose values get checked for represent-ability. (NB: that's a link to the code from #74775 (the current implementation is subtly different, but the behavior is the same because the current evaluator doesn't [fully] support _BitInt(X))

sethp · 2024-02-05T18:24:52Z

clang/lib/AST/Interp/InterpBitcast.cpp

+    size_t BitsHandled = 0;
+    // Read all full bytes first
+    for (size_t I = 0; I != BitWidth / 8; ++I) {
+      for (unsigned X = 0; X != 8; ++X) {
+        Data.push_back((data[I] & std::byte(1 << X)) != std::byte{0});
+        Initialized.push_back(true);
+        ++BitsHandled;
+      }
+    }


I'll defer to your expertise as to whether this is appropriate, here, but it does seem a little unfortunate to do a bit-by-bit copy. I think the in-band type information means we're more or less stuck doing things field-by-field (because Interp will lay out a struct { char c[3]; }; very differently from a struct { char c; short s; }?), as with the existing ExprConstant interpreter, but my intuition says that I ought to expect a uint64_t field to take roughly 8x as long to copy as a char with this implementation, despite both of them fitting within a machine word on my 64-bit system. Does my thinking match yours, here?

This is a subset of #68288, with hopefully narrower scope. It does not support bitcasting to non-integral types yet. Bitfields are supported, but only if they result in a full byte-sized final buffer. It does not support casting from null-pointers yet or casting from indeterminate bits. The tests are from #68288 and partially from #74775. The `BitcastBuffer` struct is currently always working in single bits, but I plan to (try to) optimize this for the common full-byte case.

This is a subset of llvm#68288, with hopefully narrower scope. It does not support bitcasting to non-integral types yet. Bitfields are supported, but only if they result in a full byte-sized final buffer. It does not support casting from null-pointers yet or casting from indeterminate bits. The tests are from llvm#68288 and partially from llvm#74775. The `BitcastBuffer` struct is currently always working in single bits, but I plan to (try to) optimize this for the common full-byte case.

tbaederr · 2025-01-02T11:20:21Z

This has been implemented via a series of other PRs now.

tbaederr requested review from cor3ntin, AaronBallman, shafik and erichkeane October 5, 2023 08:12

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Oct 5, 2023

tbaederr force-pushed the bitcast branch from 41f2e96 to ac1dd62 Compare October 17, 2023 14:19

AaronBallman reviewed Oct 26, 2023

View reviewed changes

tbaederr force-pushed the bitcast branch from f0e3a7b to 8af2a7c Compare November 9, 2023 11:46

cor3ntin reviewed Nov 24, 2023

View reviewed changes

tbaederr force-pushed the bitcast branch from 8af2a7c to 0f1c2b5 Compare November 27, 2023 08:55

tbaederr force-pushed the bitcast branch from 0f1c2b5 to 63157ba Compare November 27, 2023 09:53

tbaederr added 14 commits November 27, 2023 12:11

[clang][Interp][NFC] Remove unused include

0472490

Bitcasts

0c62a90

Implement bitfield bit casts to prim types

4feef2c

More bitfield work

7006c2e

All existing tests work.

1591e2c

Work on unifying the two implementations

c5f20a8

More bitfield tests

ee8d32d

Add string-to-int bitcast test

612d365

Address review comments

5c77f91

Support IntegralAP bit casts

afd8a49

Address other review comments

2988205

Rename a member and address more review comments.

f3a05e4

Handle BitCast and BitCastFP opcodes the same

48c0eeb

switch from malloc+free to llvm::SmallVector

2caadff

tbaederr force-pushed the bitcast branch from 921cd37 to 2caadff Compare November 27, 2023 15:46

tbaederr commented Nov 27, 2023

View reviewed changes

clang/lib/AST/Interp/InterpBitcast.cpp

}

}

assert(!P.getField()->isBitField());

Copy link

Contributor Author

tbaederr Nov 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still missing.

sethp mentioned this pull request Jan 16, 2024

[Clang][C++20] Implement constexpr std::bit_cast for bit-fields #74775

Open

sethp reviewed Feb 5, 2024

View reviewed changes

tbaederr mentioned this pull request Oct 13, 2024

[clang][bytecode] Start implementing __builtin_bit_cast #112126

Merged

tbaederr closed this Jan 2, 2025

	void writeMasked(const uint64_t Offset, const APInt &Input,
	const APInt &Mask) {
	assert(Input.getBitWidth() == Mask.getBitWidth());
	const uint64_t BW = Input.getBitWidth();
	const BitSlice Dest = {Offset, BW};

	auto write = [&](const APInt &Input, const APInt &Mask) {
	assert((~getBits(Invalid, Dest) & Mask) == 0 && "overwriting data?");
	const APInt Val = (Input & Mask) \| (getBits(Data, Dest) & ~Mask);
	const APInt Written = getBits(Invalid, Dest) ^ Mask;

	copyBitsFrom(Data, Dest, Val, {0, BW});
	copyBitsFrom(Invalid, Dest, Written, {0, BW});
	};

	if (!IsNativeEndian && BW > CharBit) {
	write(Input.byteSwap(), Mask.byteSwap());
	return;
	}
	write(Input, Mask);
	}

[clang][Interp] Implement __builtin_bit_cast #68288

[clang][Interp] Implement __builtin_bit_cast #68288

Uh oh!

Conversation

tbaederr commented Oct 5, 2023

Uh oh!

llvmbot commented Oct 5, 2023

Uh oh!

tbaederr commented Oct 13, 2023

Uh oh!

tbaederr commented Oct 18, 2023

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Nov 27, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

sethp left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

tbaederr commented Jan 2, 2025

Uh oh!

Uh oh!

github-actions bot commented Nov 27, 2023 •

edited

Loading