Skip to content

[clang][bytecode] Check primitive bit casts for indeterminate bits #118954

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 6, 2024

Conversation

tbaederr
Copy link
Contributor

@tbaederr tbaederr commented Dec 6, 2024

Record bits ranges of initialized bits and check them in allInitialized().

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Dec 6, 2024
@llvmbot
Copy link
Member

llvmbot commented Dec 6, 2024

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

Changes

Record bits ranges of initialized bits and check them in allInitialized().


Full diff: https://github.com/llvm/llvm-project/pull/118954.diff

6 Files Affected:

  • (modified) clang/lib/AST/ByteCode/BitcastBuffer.cpp (+51)
  • (modified) clang/lib/AST/ByteCode/BitcastBuffer.h (+24-7)
  • (modified) clang/lib/AST/ByteCode/Compiler.cpp (+1-6)
  • (modified) clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp (+1)
  • (modified) clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp (+21-2)
  • (modified) clang/test/AST/ByteCode/builtin-bit-cast.cpp (+6-13)
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
index 0cc97b0b6bf190..7f29c7c2db0147 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.cpp
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "BitcastBuffer.h"
+#include "llvm/ADT/STLExtras.h"
 
 using namespace clang;
 using namespace clang::interp;
@@ -60,6 +61,56 @@ BitcastBuffer::copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth,
   return Out;
 }
 
+bool BitcastBuffer::allInitialized() const {
+  Bits Sum;
+  for (BitRange BR : InitializedBits)
+    Sum += BR.size();
+
+  return Sum == FinalBitSize;
+}
+
+void BitcastBuffer::markInitialized(Bits Offset, Bits Length) {
+  if (Length.isZero())
+    return;
+
+  BitRange Element(Offset, Offset + Length - Bits(1));
+  if (InitializedBits.empty()) {
+    InitializedBits.push_back(Element);
+    return;
+  }
+
+  assert(InitializedBits.size() >= 1);
+  // Common case of just appending.
+  Bits End = InitializedBits.back().End;
+  if (End <= Offset) {
+    // Merge this range with the last one.
+    // In the best-case scenario, this means we only ever have
+    // one single bit range covering all bits.
+    if (End == (Offset - Bits(1))) {
+      InitializedBits.back().End = Element.End;
+      return;
+    }
+
+    // Otherwise, we can simply append.
+    InitializedBits.push_back(Element);
+  } else {
+    // Insert sorted.
+    auto It = std::upper_bound(InitializedBits.begin(), InitializedBits.end(),
+                               Element);
+    InitializedBits.insert(It, Element);
+  }
+
+#ifndef NDEBUG
+  // Ensure ranges are sorted and non-overlapping.
+  assert(llvm::is_sorted(InitializedBits));
+  for (unsigned I = 1; I != InitializedBits.size(); ++I) {
+    [[maybe_unused]] auto Prev = InitializedBits[I - 1];
+    [[maybe_unused]] auto Cur = InitializedBits[I];
+    assert(Prev.End.N < Cur.Start.N);
+  }
+#endif
+}
+
 #if 0
   template<typename T>
   static std::string hex(T t) {
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h
index c7b170ceb168fa..00fbdc9b85421d 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.h
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.h
@@ -8,6 +8,7 @@
 #ifndef LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H
 #define LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include <cassert>
 #include <cstddef>
 #include <memory>
@@ -30,14 +31,20 @@ struct Bits {
   bool nonZero() const { return N != 0; }
   bool isZero() const { return N == 0; }
 
-  Bits operator-(Bits Other) { return Bits(N - Other.N); }
-  Bits operator+(Bits Other) { return Bits(N + Other.N); }
+  Bits operator-(Bits Other) const { return Bits(N - Other.N); }
+  Bits operator+(Bits Other) const { return Bits(N + Other.N); }
   Bits operator+=(size_t O) {
     N += O;
     return *this;
   }
+  Bits operator+=(Bits O) {
+    N += O.N;
+    return *this;
+  }
 
-  bool operator>=(Bits Other) { return N >= Other.N; }
+  bool operator>=(Bits Other) const { return N >= Other.N; }
+  bool operator<=(Bits Other) const { return N <= Other.N; }
+  bool operator==(Bits Other) const { return N == Other.N; }
 };
 
 /// A quantity in bytes.
@@ -48,11 +55,21 @@ struct Bytes {
   Bits toBits() const { return Bits(N * 8); }
 };
 
+struct BitRange {
+  Bits Start;
+  Bits End;
+
+  BitRange(Bits Start, Bits End) : Start(Start), End(End) {}
+  Bits size() const { return End - Start + Bits(1); }
+  bool operator<(BitRange Other) const { return Start.N < Other.Start.N; }
+};
+
 /// Track what bits have been initialized to known values and which ones
 /// have indeterminate value.
 struct BitcastBuffer {
   Bits FinalBitSize;
   std::unique_ptr<std::byte[]> Data;
+  llvm::SmallVector<BitRange> InitializedBits;
 
   BitcastBuffer(Bits FinalBitSize) : FinalBitSize(FinalBitSize) {
     assert(FinalBitSize.isFullByte());
@@ -64,10 +81,10 @@ struct BitcastBuffer {
   Bits size() const { return FinalBitSize; }
 
   /// Returns \c true if all bits in the buffer have been initialized.
-  bool allInitialized() const {
-    // FIXME: Implement.
-    return true;
-  }
+  bool allInitialized() const;
+  /// Marks the bits in the given range as initialized.
+  /// FIXME: Can we do this automatically in pushData()?
+  void markInitialized(Bits Start, Bits Length);
 
   /// Push \p BitWidth bits at \p BitOffset from \p In into the buffer.
   /// \p TargetEndianness is the endianness of the target we're compiling for.
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 900312401bbda0..7f6295e126dcfe 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -6483,6 +6483,7 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
   QualType ToType = E->getType();
   std::optional<PrimType> ToT = classify(ToType);
 
+  // Bitcasting TO nullptr_t is always fine.
   if (ToType->isNullPtrType()) {
     if (!this->discard(SubExpr))
       return false;
@@ -6490,12 +6491,6 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
     return this->emitNullPtr(0, nullptr, E);
   }
 
-  if (FromType->isNullPtrType() && ToT) {
-    if (!this->discard(SubExpr))
-      return false;
-
-    return visitZeroInitializer(*ToT, ToType, E);
-  }
   assert(!ToType->isReferenceType());
 
   // Prepare storage for the result in case we discard.
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
index 6ee3826fb3eea6..4c25a3bb132fcf 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
@@ -287,6 +287,7 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
         }
 
         Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness);
+        Buffer.markInitialized(BitOffset, BitWidth);
         return true;
       });
 }
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
index 00f465a471b0a4..e5337a57bf0fe4 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
@@ -134,11 +134,11 @@ namespace BitFields {
     enum byte : unsigned char {};
 
     constexpr BF bf = {0x3};
-    /// Requires bitcasts to composite types.
     static_assert(bit_cast<bits<2>>(bf).bits == bf.z);
     static_assert(bit_cast<unsigned char>(bf));
 
-    static_assert(__builtin_bit_cast(byte, bf));
+    static_assert(__builtin_bit_cast(byte, bf)); // expected-error {{not an integral constant expression}} \
+                                                 // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'byte' is invalid}}
 
     struct M {
       // ref-note@+1 {{subobject declared here}}
@@ -439,3 +439,22 @@ namespace Enums {
   static_assert(
     bit_cast<X>((unsigned char)0x40).direction == X::direction::right);
 }
+
+namespace IndeterminateBits {
+  struct S {
+    unsigned a : 13;
+    unsigned   : 17;
+    unsigned b : 2;
+  };
+  constexpr unsigned A = __builtin_bit_cast(unsigned, S{12, 3}); // expected-error {{must be initialized by a constant expression}} \
+                                                                 // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'unsigned int' is invalid}}
+
+
+  /// GCC refuses to compile this as soon as we access the indeterminate bits
+  /// in the static_assert. MSVC accepts it.
+  struct S2 {
+    unsigned char a : 2;
+  };
+  constexpr unsigned char B = __builtin_bit_cast(unsigned char, S2{3});
+  static_assert(B == (LITTLE_END ? 3 : 192));
+}
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
index f89eb3584bbcff..6ca8860d5fc18a 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
@@ -130,12 +130,8 @@ namespace simple {
   static_assert(check_round_trip<unsigned>((int)0x0C05FEFE));
   static_assert(round_trip<float>((int)0x0C05FEFE));
 
-
-  /// This works in GCC and in the bytecode interpreter, but the current interpreter
-  /// diagnoses it.
-  /// FIXME: Should also be rejected in the bytecode interpreter.
-  static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // ref-error {{not an integral constant expression}} \
-                                                             // ref-note {{indeterminate value can only initialize an object}}
+  static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // both-error {{not an integral constant expression}} \
+                                                             // both-note {{indeterminate value can only initialize an object}}
 
   constexpr int test_from_nullptr_pass = (__builtin_bit_cast(unsigned char[sizeof(nullptr)], nullptr), 0);
   constexpr unsigned char NPData[sizeof(nullptr)] = {1,2,3,4};
@@ -394,7 +390,6 @@ void bad_types() {
   };
   static_assert(__builtin_bit_cast(int, X{0}) == 0); // both-error {{not an integral constant expression}} \
                                                      // both-note {{bit_cast from a union type is not allowed in a constant expression}}
-#if 1
 
   struct G {
     int g;
@@ -405,19 +400,17 @@ void bad_types() {
   // both-error@+2 {{constexpr variable 'x' must be initialized by a constant expression}}
   // both-note@+1 {{bit_cast to a union type is not allowed in a constant expression}}
   constexpr X x = __builtin_bit_cast(X, G{0});
-#endif
+
   struct has_pointer {
-    int *ptr; // both-note {{invalid type 'int *' is a member of 'has_pointer'}}
+    int *ptr; // both-note 2{{invalid type 'int *' is a member of 'has_pointer'}}
   };
 
   constexpr intptr_t ptr = __builtin_bit_cast(intptr_t, has_pointer{0}); // both-error {{constexpr variable 'ptr' must be initialized by a constant expression}} \
                                                                          // both-note {{bit_cast from a pointer type is not allowed in a constant expression}}
 
-#if 0
-  // expected-error@+2 {{constexpr variable 'hptr' must be initialized by a constant expression}}
-  // expected-note@+1 {{bit_cast to a pointer type is not allowed in a constant expression}}
+  // both-error@+2 {{constexpr variable 'hptr' must be initialized by a constant expression}}
+  // both-note@+1 {{bit_cast to a pointer type is not allowed in a constant expression}}
   constexpr has_pointer hptr =  __builtin_bit_cast(has_pointer, 0ul);
-#endif
 }
 
 void test_array_fill() {

Record bits ranges of initialized bits and check them in
allInitialized().
@tbaederr tbaederr merged commit 2f9cd43 into llvm:main Dec 6, 2024
8 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants