Skip to content

[clang][bytecode] Fix various issues with multidimensional arrays #134628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2025

Conversation

tbaederr
Copy link
Contributor

@tbaederr tbaederr commented Apr 7, 2025

This issue is very convoluted, but in essence, in the new version:

For a Pointer P that points to the root of a multidimensional, primitive array:

P.narrow() does nothing.
P.atIndex(0) points P[0]
P.atIndex(0).atIndex(0) is the same as P.atIndex(0) (as before)
P.atIndex(0).narrow().atIndex(0) points to P[0][0]
P.atIndex(0).narrow().narrow() is the same as P.atIndex(0).narrow().

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Apr 7, 2025
@llvmbot
Copy link
Member

llvmbot commented Apr 7, 2025

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

Changes

This issue is very convoluted, but in essence, in the new version:

For a Pointer P that points to the root of a multidimensional, primitive array:

P.narrow() does nothing.
P.atIndex(0) points P[0]
P.atIndex(0).atIndex(0) is the same as P.atIndex(0) (as before)
P.atIndex(0).narrow().atIndex(0) points to P[0][0]


Full diff: https://github.com/llvm/llvm-project/pull/134628.diff

4 Files Affected:

  • (modified) clang/lib/AST/ByteCode/Compiler.cpp (+2-1)
  • (modified) clang/lib/AST/ByteCode/Interp.h (+25-6)
  • (modified) clang/lib/AST/ByteCode/Pointer.h (+13-22)
  • (modified) clang/test/AST/ByteCode/arrays.cpp (+83-1)
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 021acbd798646..dd246f7ef74fc 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -6148,7 +6148,8 @@ bool Compiler<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
 
     if (!this->visit(SubExpr))
       return false;
-    if (classifyPrim(SubExpr) == PT_Ptr && !E->getType()->isArrayType())
+
+    if (classifyPrim(SubExpr) == PT_Ptr)
       return this->emitNarrowPtr(E);
     return true;
 
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 6fe1d4b1f95ae..ee69cea039990 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2059,8 +2059,11 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset,
   // useful thing we can do. Any other index has been diagnosed before and
   // we don't get here.
   if (Result == 0 && Ptr.isOnePastEnd()) {
-    S.Stk.push<Pointer>(Ptr.asBlockPointer().Pointee,
-                        Ptr.asBlockPointer().Base);
+    if (Ptr.getFieldDesc()->isArray())
+      S.Stk.push<Pointer>(Ptr.atIndex(0));
+    else
+      S.Stk.push<Pointer>(Ptr.asBlockPointer().Pointee,
+                          Ptr.asBlockPointer().Base);
     return true;
   }
 
@@ -2677,8 +2680,16 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) {
       return false;
   }
 
-  if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
-    return false;
+  if (Offset.isZero()) {
+    if (Ptr.getFieldDesc()->isArray() && Ptr.getIndex() == 0) {
+      S.Stk.push<Pointer>(Ptr.atIndex(0));
+    } else {
+      S.Stk.push<Pointer>(Ptr);
+    }
+  } else {
+    if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
+      return false;
+  }
 
   return NarrowPtr(S, OpPC);
 }
@@ -2693,8 +2704,16 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) {
       return false;
   }
 
-  if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
-    return false;
+  if (Offset.isZero()) {
+    if (Ptr.getFieldDesc()->isArray() && Ptr.getIndex() == 0) {
+      S.Stk.push<Pointer>(Ptr.atIndex(0));
+    } else {
+      S.Stk.push<Pointer>(Ptr);
+    }
+  } else {
+    if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
+      return false;
+  }
 
   return NarrowPtr(S, OpPC);
 }
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 988237d39fff4..64af5ed9b0a5d 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -200,37 +200,28 @@ class Pointer {
     if (isZero() || isUnknownSizeArray())
       return *this;
 
+    unsigned Base = asBlockPointer().Base;
     // Pointer to an array of base types - enter block.
-    if (asBlockPointer().Base == RootPtrMark)
+    if (Base == RootPtrMark)
       return Pointer(asBlockPointer().Pointee, sizeof(InlineDescriptor),
                      Offset == 0 ? Offset : PastEndMark);
 
     // Pointer is one past end - magic offset marks that.
     if (isOnePastEnd())
-      return Pointer(asBlockPointer().Pointee, asBlockPointer().Base,
-                     PastEndMark);
-
-    // Primitive arrays are a bit special since they do not have inline
-    // descriptors. If Offset != Base, then the pointer already points to
-    // an element and there is nothing to do. Otherwise, the pointer is
-    // adjusted to the first element of the array.
-    if (inPrimitiveArray()) {
-      if (Offset != asBlockPointer().Base)
+      return Pointer(asBlockPointer().Pointee, Base, PastEndMark);
+
+    if (Offset != Base) {
+      // If we're pointing to a primitive array element, there's nothing to do.
+      if (inPrimitiveArray())
         return *this;
-      return Pointer(asBlockPointer().Pointee, asBlockPointer().Base,
-                     Offset + sizeof(InitMapPtr));
+      // Pointer is to a composite array element - enter it.
+      if (Offset != Base)
+        return Pointer(asBlockPointer().Pointee, Offset, Offset);
     }
 
-    // Pointer is to a field or array element - enter it.
-    if (Offset != asBlockPointer().Base)
-      return Pointer(asBlockPointer().Pointee, Offset, Offset);
-
-    // Enter the first element of an array.
-    if (!getFieldDesc()->isArray())
-      return *this;
-
-    const unsigned NewBase = asBlockPointer().Base + sizeof(InlineDescriptor);
-    return Pointer(asBlockPointer().Pointee, NewBase, NewBase);
+    // Otherwise, we're pointing to a non-array element or
+    // are already narrowed to a composite array element. Nothing to do.
+    return *this;
   }
 
   /// Expands a pointer to the containing array, undoing narrowing.
diff --git a/clang/test/AST/ByteCode/arrays.cpp b/clang/test/AST/ByteCode/arrays.cpp
index 2ef0cf886b2dc..8af82163fd815 100644
--- a/clang/test/AST/ByteCode/arrays.cpp
+++ b/clang/test/AST/ByteCode/arrays.cpp
@@ -637,11 +637,93 @@ static_assert(get2() == same_entity_2, "failed to find previous decl");
 
 constexpr int zs[2][2][2][2] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
 constexpr int fail(const int &p) {
-  return (&p)[64]; // both-note {{cannot refer to element 64 of array of 2 elements}}
+  return (&p)[64]; // both-note 2{{cannot refer to element 64 of array of 2 elements}} \
+                   // both-note {{cannot refer to element 65 of array of 2 elements}} \
+                   // both-note {{cannot refer to element 66 of array of 2 elements}}
 }
 static_assert(fail(*(&(&(*(*&(&zs[2] - 1)[0] + 2 - 2))[2])[-1][2] - 2)) == 11, ""); // both-error {{not an integral constant expression}} \
                                                                                     // both-note {{in call to}}
 
+
+static_assert(fail( // both-error {{not an integral constant expression}} \
+                    // both-note {{in call to 'fail(zs[1][1][0][0])'}}
+      *(*(*((*
+  (zs + 1))     /// int[2][2][2]
+      + 1)      /// int[2][2]
+      + 2 - 2)  /// int[2]
+      + 2 - 2)  /// int
+      ));
+
+static_assert(fail( // both-error {{not an integral constant expression}} \
+                    // both-note {{in call to 'fail(zs[1][0][0][1])'}}
+      *(*(*((*
+  (zs + 1))     /// int[2][2][2]
+      + 0)      /// int[2][2]
+      + 2 - 2)  /// int[2]
+      + 1)      /// int
+      ));
+
+static_assert(fail( // both-error {{not an integral constant expression}} \
+                    // both-note {{in call to 'fail(zs[1][0][0][2])'}}
+      *(*(*((*
+  (zs + 1))     /// int[2][2][2]
+      + 0)      /// int[2][2]
+      + 2 - 2)  /// int[2]
+      + 2)      /// int
+      ));
+
+namespace ZeroIndex {
+  constexpr char foo(const char *a) {
+    return a[0];
+  }
+  constexpr const char *f = "abc";
+  static_assert(foo(f + 1) == 'b', "");
+}
+
+namespace MultiDimArrayOffset {
+#define assert(x) (x ? void(0) : __builtin_abort())
+  struct R {
+    int a;
+  };
+
+  template<typename T>
+  class view {
+  public:
+    T* V;
+    T* current;
+
+    constexpr view(T*V) : V(V), current(V) {}
+
+    constexpr void operator+=(unsigned N) {
+      current += N;
+    }
+
+    constexpr auto operator*() {
+      return *current;
+    }
+
+  };
+
+  constexpr int foo() {
+    R buffer[2][4] = {{1, 2, 3, 4}, {5, 6, 7, 8}};
+
+    auto A = buffer;
+    A += 1;
+    assert((**A).a == 5);
+    assert(buffer == buffer + 1 - 1);
+
+    assert(--A+0 == buffer+0);
+
+    view V(buffer);
+    assert(*V == &buffer[0][0]);
+    V += 1;
+    assert(*V == &buffer[1][0]);
+    assert(*(V.current) == &buffer[1][0]);
+    return 1;
+  }
+  static_assert(foo() == 1, "");
+}
+
 namespace ZeroSizeTypes {
   constexpr int (*p1)[0] = 0, (*p2)[0] = 0;
   constexpr int k = p2 - p1; // both-error {{constexpr variable 'k' must be initialized by a constant expression}} \

This issue is very convulted, but in essence, in the new version:

For a Pointer P that points to the root of a multidimensional,
primitive array:

P.narrow() does nothing.
P.atIndex(0) points P[0]
P.atIndex(0).atIndex(0) is the same as P.atIndex(0) (as before)
P.atIndex(0).narrow().atIndex(0) points to P[0][0]
@tbaederr tbaederr changed the title [clang][bytecode] Fix various issues with multidimensional arrarys [clang][bytecode] Fix various issues with multidimensional arrars Apr 7, 2025
@tbaederr tbaederr changed the title [clang][bytecode] Fix various issues with multidimensional arrars [clang][bytecode] Fix various issues with multidimensional arrays Apr 7, 2025
@tbaederr tbaederr merged commit bdd0870 into llvm:main Apr 8, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants