[CIR] Upstream ArraySubscriptExpr for fixed size array

AmrDeveloper · AmrDeveloper · commit 81e1cd76594d · 2025-04-06T14:32:37.000+02:00
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
@@ -108,6 +108,7 @@ struct MissingFeatures {
   static bool cgFPOptionsRAII() { return false; }
   static bool metaDataNode() { return false; }
   static bool fastMathFlags() { return false; }
+  static bool emitCheckedInBoundsGEP() { return false; }
 
   // Missing types
   static bool dataMemberType() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenBuilder.cpp
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenBuilder.h"
+
+using namespace clang::CIRGen;
+
+mlir::Value CIRGenBuilderTy::maybeBuildArrayDecay(mlir::Location loc,
+                                                  mlir::Value arrayPtr,
+                                                  mlir::Type eltTy) {
+  const auto arrayPtrTy = mlir::cast<cir::PointerType>(arrayPtr.getType());
+  const auto arrayTy = mlir::dyn_cast<cir::ArrayType>(arrayPtrTy.getPointee());
+
+  if (arrayTy) {
+    const cir::PointerType flatPtrTy = getPointerTo(arrayTy.getEltType());
+    return create<cir::CastOp>(loc, flatPtrTy, cir::CastKind::array_to_ptrdecay,
+                               arrayPtr);
+  }
+
+  assert(arrayPtrTy.getPointee() == eltTy &&
+         "flat pointee type must match original array element type");
+  return arrayPtr;
+}
+
+mlir::Value CIRGenBuilderTy::getArrayElement(mlir::Location arrayLocBegin,
+                                             mlir::Location arrayLocEnd,
+                                             mlir::Value arrayPtr,
+                                             mlir::Type eltTy, mlir::Value idx,
+                                             bool shouldDecay) {
+  mlir::Value basePtr = arrayPtr;
+  if (shouldDecay)
+    basePtr = maybeBuildArrayDecay(arrayLocBegin, arrayPtr, eltTy);
+  const mlir::Type flatPtrTy = basePtr.getType();
+  return create<cir::PtrStrideOp>(arrayLocEnd, flatPtrTy, basePtr, idx);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -198,6 +198,19 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
 
     return create<cir::BinOp>(loc, cir::BinOpKind::Div, lhs, rhs);
   }
+
+  /// Create a cir.ptr_stride operation to get access to an array element.
+  /// idx is the index of the element to access, shouldDecay is true if the
+  /// result should decay to a pointer to the element type.
+  mlir::Value getArrayElement(mlir::Location arrayLocBegin,
+                              mlir::Location arrayLocEnd, mlir::Value arrayPtr,
+                              mlir::Type eltTy, mlir::Value idx,
+                              bool shouldDecay);
+
+  /// Returns a decayed pointer to the first element of the array
+  /// pointed to by arrayPtr.
+  mlir::Value maybeBuildArrayDecay(mlir::Location loc, mlir::Value arrayPtr,
+                                   mlir::Type eltTy);
 };
 
 } // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -12,6 +12,7 @@
 
 #include "Address.h"
 #include "CIRGenFunction.h"
+#include "CIRGenModule.h"
 #include "CIRGenValue.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "clang/AST/Attr.h"
@@ -232,6 +233,161 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *e) {
   llvm_unreachable("Unknown unary operator kind!");
 }
 
+/// If the specified expr is a simple decay from an array to pointer,
+/// return the array subexpression.
+/// FIXME: this could be abstracted into a commeon AST helper.
+static const Expr *isSimpleArrayDecayOperand(const Expr *e) {
+  // If this isn't just an array->pointer decay, bail out.
+  const auto *castExpr = dyn_cast<CastExpr>(e);
+  if (!castExpr || castExpr->getCastKind() != CK_ArrayToPointerDecay)
+    return nullptr;
+
+  // If this is a decay from variable width array, bail out.
+  const Expr *subExpr = castExpr->getSubExpr();
+  if (subExpr->getType()->isVariableArrayType())
+    return nullptr;
+
+  return subExpr;
+}
+
+static mlir::IntegerAttr getConstantIndexOrNull(mlir::Value idx) {
+  // TODO(cir): should we consider using MLIRs IndexType instead of IntegerAttr?
+  if (auto constantOp = dyn_cast<cir::ConstantOp>(idx.getDefiningOp()))
+    return mlir::dyn_cast<mlir::IntegerAttr>(constantOp.getValue());
+  return {};
+}
+
+static CharUnits getArrayElementAlign(CharUnits arrayAlign, mlir::Value idx,
+                                      CharUnits eltSize) {
+  // If we have a constant index, we can use the exact offset of the
+  // element we're accessing.
+  const mlir::IntegerAttr constantIdx = getConstantIndexOrNull(idx);
+  if (constantIdx) {
+    const CharUnits offset = constantIdx.getValue().getZExtValue() * eltSize;
+    return arrayAlign.alignmentAtOffset(offset);
+  }
+  // Otherwise, use the worst-case alignment for any element.
+  return arrayAlign.alignmentOfArrayElement(eltSize);
+}
+
+static QualType getFixedSizeElementType(const ASTContext &astContext,
+                                        const VariableArrayType *vla) {
+  QualType eltType;
+  do {
+    eltType = vla->getElementType();
+  } while ((vla = astContext.getAsVariableArrayType(eltType)));
+  return eltType;
+}
+
+static mlir::Value
+emitArraySubscriptPtr(CIRGenFunction &cgf, mlir::Location beginLoc,
+                      mlir::Location endLoc, mlir::Value ptr, mlir::Type eltTy,
+                      ArrayRef<mlir::Value> indices, bool inbounds,
+                      bool signedIndices, bool shouldDecay,
+                      const llvm::Twine &name = "arrayidx") {
+  if (indices.size() > 1) {
+    cgf.cgm.errorNYI("emitArraySubscriptPtr: handle multiple indices");
+    return {};
+  }
+
+  const mlir::Value idx = indices.back();
+  CIRGenModule &cgm = cgf.getCIRGenModule();
+  // TODO(cir): LLVM codegen emits in bound gep check here, is there anything
+  // that would enhance tracking this later in CIR?
+  if (inbounds)
+    assert(!cir::MissingFeatures::emitCheckedInBoundsGEP() && "NYI");
+  return cgm.getBuilder().getArrayElement(beginLoc, endLoc, ptr, eltTy, idx,
+                                          shouldDecay);
+}
+
+static Address emitArraySubscriptPtr(
+    CIRGenFunction &cgf, mlir::Location beginLoc, mlir::Location endLoc,
+    Address addr, ArrayRef<mlir::Value> indices, QualType eltType,
+    bool inbounds, bool signedIndices, mlir::Location loc, bool shouldDecay,
+    QualType *arrayType = nullptr, const Expr *base = nullptr,
+    const llvm::Twine &name = "arrayidx") {
+
+  // Determine the element size of the statically-sized base.  This is
+  // the thing that the indices are expressed in terms of.
+  if (const VariableArrayType *vla =
+          cgf.getContext().getAsVariableArrayType(eltType)) {
+    eltType = getFixedSizeElementType(cgf.getContext(), vla);
+  }
+
+  // We can use that to compute the best alignment of the element.
+  const CharUnits eltSize = cgf.getContext().getTypeSizeInChars(eltType);
+  const CharUnits eltAlign =
+      getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
+
+  mlir::Value eltPtr;
+  const mlir::IntegerAttr lastIndex = getConstantIndexOrNull(indices.back());
+  if (!lastIndex) {
+    eltPtr = emitArraySubscriptPtr(cgf, beginLoc, endLoc, addr.getPointer(),
+                                   addr.getElementType(), indices, inbounds,
+                                   signedIndices, shouldDecay, name);
+  }
+  const mlir::Type elementType = cgf.convertTypeForMem(eltType);
+  return Address(eltPtr, elementType, eltAlign);
+}
+
+LValue
+CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
+  if (e->getBase()->getType()->isVectorType() &&
+      !isa<ExtVectorElementExpr>(e->getBase())) {
+    cgm.errorNYI(e->getSourceRange(), "emitArraySubscriptExpr: VectorType");
+    return {};
+  }
+
+  if (isa<ExtVectorElementExpr>(e->getBase())) {
+    cgm.errorNYI(e->getSourceRange(),
+                 "emitArraySubscriptExpr: ExtVectorElementExpr");
+    return {};
+  }
+
+  if (getContext().getAsVariableArrayType(e->getType())) {
+    cgm.errorNYI(e->getSourceRange(),
+                 "emitArraySubscriptExpr: VariableArrayType");
+    return {};
+  }
+
+  if (e->getType()->getAs<ObjCObjectType>()) {
+    cgm.errorNYI(e->getSourceRange(), "emitArraySubscriptExpr: ObjCObjectType");
+    return {};
+  }
+
+  // The index must always be an integer, which is not an aggregate.  Emit it
+  // in lexical order (this complexity is, sadly, required by C++17).
+  assert((e->getIdx() == e->getLHS() || e->getIdx() == e->getRHS()) &&
+         "index was neither LHS nor RHS");
+  const mlir::Value idx = emitScalarExpr(e->getIdx());
+  const QualType idxTy = e->getIdx()->getType();
+  const bool signedIndices = idxTy->isSignedIntegerOrEnumerationType();
+
+  if (const Expr *array = isSimpleArrayDecayOperand(e->getBase())) {
+    LValue arrayLV;
+    if (const auto *ase = dyn_cast<ArraySubscriptExpr>(array))
+      arrayLV = emitArraySubscriptExpr(ase);
+    else
+      arrayLV = emitLValue(array);
+
+    // Propagate the alignment from the array itself to the result.
+    QualType arrayType = array->getType();
+    const Address addr = emitArraySubscriptPtr(
+        *this, cgm.getLoc(array->getBeginLoc()), cgm.getLoc(array->getEndLoc()),
+        arrayLV.getAddress(), {idx}, e->getType(),
+        !getLangOpts().isSignedOverflowDefined(), signedIndices,
+        cgm.getLoc(e->getExprLoc()), /*shouldDecay=*/true, &arrayType,
+        e->getBase());
+
+    return LValue::makeAddr(addr, e->getType());
+  }
+
+  // The base must be a pointer; emit it with an estimate of its alignment.
+  cgm.errorNYI(e->getSourceRange(),
+               "emitArraySubscriptExpr: The base must be a pointer");
+  return {};
+}
+
 LValue CIRGenFunction::emitBinaryOperatorLValue(const BinaryOperator *e) {
   // Comma expressions just emit their LHS then their RHS as an l-value.
   if (e->getOpcode() == BO_Comma) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -157,6 +157,15 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
 
   mlir::Value VisitCastExpr(CastExpr *e);
 
+  mlir::Value VisitArraySubscriptExpr(ArraySubscriptExpr *e) {
+    if (e->getBase()->getType()->isVectorType()) {
+      assert(!cir::MissingFeatures::scalableVectors() &&
+             "NYI: index into scalable vector");
+    }
+    // Just load the lvalue formed by the subscript expression.
+    return emitLoadOfLValue(e);
+  }
+
   mlir::Value VisitExplicitCastExpr(ExplicitCastExpr *e) {
     return VisitCastExpr(e);
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -444,6 +444,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
                                std::string("l-value not implemented for '") +
                                    e->getStmtClassName() + "'");
     return LValue();
+  case Expr::ArraySubscriptExprClass:
+    return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
   case Expr::UnaryOperatorClass:
     return emitUnaryOpLValue(cast<UnaryOperator>(e));
   case Expr::BinaryOperatorClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -387,6 +387,8 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// should be returned.
   RValue emitAnyExpr(const clang::Expr *e);
 
+  LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e);
+
   AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d);
 
   /// Emit code and set up symbol table for a variable declaration with auto,
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -8,6 +8,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_clang_library(clangCIR
   CIRGenerator.cpp
+  CIRGenBuilder.cpp
   CIRGenDecl.cpp
   CIRGenExpr.cpp
   CIRGenExprAggregate.cpp
diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp
@@ -29,8 +29,15 @@ int f[5] = {1, 2};
 
 void func() {
   int arr[10];
-
   // CHECK: %[[ARR:.*]] = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["arr"]
+
+  int e = arr[1];
+  // CHECK: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
+  // CHECK: %[[IDX:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+  // CHECK: %[[ELE_PTR:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!s32i>, %[[IDX]] : !s32i), !cir.ptr<!s32i>
+  // CHECK: %[[TMP:.*]] = cir.load %[[ELE_PTR]] : !cir.ptr<!s32i>, !s32i
+  // CHECK" cir.store %[[TMP]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
 }
 
 void func2() {
@@ -69,6 +76,7 @@ void func4() {
   int arr[2][1] = {{5}, {6}};
 
   // CHECK: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
+  // CHECK: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
   // CHECK: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>), !cir.ptr<!cir.array<!s32i x 1>>
   // CHECK: %[[ARR_0_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
   // CHECK: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
@@ -78,6 +86,17 @@ void func4() {
   // CHECK: %[[ARR_1_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
   // CHECK: %[[V_1_0:.*]] = cir.const #cir.int<6> : !s32i
   // CHECK: cir.store %[[V_1_0]], %[[ARR_1_PTR]] : !s32i, !cir.ptr<!s32i>
+
+  int e = arr[1][0];
+
+  // CHECK: %[[IDX:.*]] = cir.const #cir.int<0> : !s32i
+  // CHECK: %[[IDX_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>), !cir.ptr<!cir.array<!s32i x 1>>
+  // CHECK: %[[ARR_1:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, %[[IDX_1]] : !s32i), !cir.ptr<!cir.array<!s32i x 1>>
+  // CHECK: %[[ARR_1_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
+  // CHECK: %[[ELE_0:.*]] = cir.ptr_stride(%[[ARR_1_PTR]] : !cir.ptr<!s32i>, %[[IDX]] : !s32i), !cir.ptr<!s32i>
+  // CHECK: %[[TMP:.*]] = cir.load %[[ELE_0]] : !cir.ptr<!s32i>, !s32i
+  // CHECK: cir.store %[[TMP]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
 }
 
 void func5() {
diff --git a/clang/test/CIR/Lowering/array.cpp b/clang/test/CIR/Lowering/array.cpp
@@ -31,12 +31,18 @@ int f[5] = {1, 2};
 
 void func() {
   int arr[10];
+  int e = arr[1];
 }
 // CHECK: define void @func()
-// CHECK-NEXT: alloca [10 x i32], i64 1, align 16
+// CHECK-NEXT: %[[ARR_ALLOCA:.*]] = alloca [10 x i32], i64 1, align 16
+// CHECK-NEXT: %[[INIT:.*]] = alloca i32, i64 1, align 4
+// CHECK-NEXT: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR_ALLOCA]], i32 0
+// CHECK-NEXT: %[[ELE_PTR:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i64 1
+// CHECK-NEXT: %[[TMP:.*]] = load i32, ptr %[[ELE_PTR]], align 4
+// CHECK-NEXT: store i32 %[[TMP]], ptr %[[INIT]], align 4
 
 void func2() {
-  int arr2[2] = {5};
+  int arr[2] = {5};
 }
 // CHECK: define void @func2()
 // CHECK:  %[[ARR_ALLOCA:.*]] = alloca [2 x i32], i64 1, align 4
@@ -61,19 +67,27 @@ void func3() {
 // CHECK:  store i32 6, ptr %[[ELE_1_PTR]], align 4
 
 void func4() {
-  int arr4[2][1] = {{5}, {6}};
+  int arr[2][1] = {{5}, {6}};
+  int e = arr[1][0];
 }
 // CHECK: define void @func4()
 // CHECK:  %[[ARR_ALLOCA:.*]] = alloca [2 x [1 x i32]], i64 1, align 4
-// CHECK:  %[[ARR_0:.*]] = getelementptr [1 x i32], ptr %[[ARR_ALLOCA]], i32 0
-// CHECK:  %[[ARR_0_ELE_0:.*]] = getelementptr i32, ptr %[[ARR_0]], i32 0
-// CHECK:  store i32 5, ptr %[[ARR_0_ELE_0]], align 4
-// CHECK:  %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %2, i64 1
-// CHECK:  %[[ARR_0_ELE_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
-// CHECK:  store i32 6, ptr %[[ARR_0_ELE_0]], align 4
+// CHECK:  %[[INIT:.*]] = alloca i32, i64 1, align 4
+// CHECK:  %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR_ALLOCA]], i32 0
+// CHECK:  %[[ARR_0_0:.*]] = getelementptr i32, ptr %[[ARR_PTR]], i32 0
+// CHECK:  store i32 5, ptr %[[ARR_0_0]], align 4
+// CHECK:  %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
+// CHECK:  %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
+// CHECK:  store i32 6, ptr %[[ARR_1_0]], align 4
+// CHECK:  %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR_ALLOCA]], i32 0
+// CHECK:  %[[ARR_1:.*]] = getelementptr [1 x i32], ptr %[[ARR_PTR]], i64 1
+// CHECK:  %[[ARR_1_0:.*]] = getelementptr i32, ptr %[[ARR_1]], i32 0
+// CHECK:  %[[ELE_PTR:.*]] = getelementptr i32, ptr %[[ARR_1_0]], i64 0
+// CHECK:  %[[TMP:.*]] = load i32, ptr %[[ELE_PTR]], align 4
+// CHECK:  store i32 %[[TMP]], ptr %[[INIT]], align 4
 
 void func5() {
-  int arr5[2][1] = {{5}};
+  int arr[2][1] = {{5}};
 }
 // CHECK: define void @func5()
 // CHECK:  %[[ARR_ALLOCA:.*]] = alloca [2 x [1 x i32]], i64 1, align 4