-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[CIR] Upstream pointer arithmetic support #138041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This adds support for explicit pointer arithmetic.
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clangir Author: Andy Kaylor (andykaylor) ChangesThis adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values. Full diff: https://github.com/llvm/llvm-project/pull/138041.diff 3 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 5f3bf8b416cde..7d16d70564227 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -185,6 +185,15 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
}
bool isInt(mlir::Type i) { return mlir::isa<cir::IntType>(i); }
+ //
+ // Constant creation helpers
+ // -------------------------
+ //
+ cir::ConstantOp getSInt32(int32_t c, mlir::Location loc) {
+ auto sInt32Ty = getSInt32Ty();
+ return create<cir::ConstantOp>(loc, cir::IntAttr::get(sInt32Ty, c));
+ }
+
// Creates constant nullptr for pointer type ty.
cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) {
assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer());
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index aef5b125a2877..9fc9bbc3e0c93 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -388,9 +388,26 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
// NOTE(CIR): clang calls CreateAdd but folds this to a unary op
value = emitUnaryOp(e, kind, input, /*nsw=*/false);
}
- } else if (isa<PointerType>(type)) {
- cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec pointer");
- return {};
+ } else if (const PointerType *ptr = type->getAs<PointerType>()) {
+ QualType type = ptr->getPointeeType();
+ if (cgf.getContext().getAsVariableArrayType(type)) {
+ // VLA types don't have constant size.
+ cgf.cgm.errorNYI(e->getSourceRange(), "Pointer arithmetic on VLA");
+ return {};
+ } else if (type->isFunctionType()) {
+ // Arithmetic on function pointers (!) is just +-1.
+ cgf.cgm.errorNYI(e->getSourceRange(),
+ "Pointer arithmetic on function pointer");
+ return {};
+ } else {
+ // For everything else, we can just do a simple increment.
+ mlir::Location loc = cgf.getLoc(e->getSourceRange());
+ CIRGenBuilderTy &builder = cgf.getBuilder();
+ int amount = (isInc ? 1 : -1);
+ mlir::Value amt = builder.getSInt32(amount, loc);
+ assert(!cir::MissingFeatures::sanitizers());
+ value = builder.createPtrStride(loc, value, amt);
+ }
} else if (type->isVectorType()) {
cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec vector");
return {};
@@ -1152,8 +1169,72 @@ getUnwidenedIntegerType(const ASTContext &astContext, const Expr *e) {
static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf,
const BinOpInfo &op,
bool isSubtraction) {
- cgf.cgm.errorNYI(op.loc, "pointer arithmetic");
- return {};
+ // Must have binary (not unary) expr here. Unary pointer
+ // increment/decrement doesn't use this path.
+ const BinaryOperator *expr = cast<BinaryOperator>(op.e);
+
+ mlir::Value pointer = op.lhs;
+ Expr *pointerOperand = expr->getLHS();
+ mlir::Value index = op.rhs;
+ Expr *indexOperand = expr->getRHS();
+
+ // In a subtraction, the LHS is always the pointer.
+ if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) {
+ std::swap(pointer, index);
+ std::swap(pointerOperand, indexOperand);
+ }
+
+ // Some versions of glibc and gcc use idioms (particularly in their malloc
+ // routines) that add a pointer-sized integer (known to be a pointer value)
+ // to a null pointer in order to cast the value back to an integer or as
+ // part of a pointer alignment algorithm. This is undefined behavior, but
+ // we'd like to be able to compile programs that use it.
+ //
+ // Normally, we'd generate a GEP with a null-pointer base here in response
+ // to that code, but it's also UB to dereference a pointer created that
+ // way. Instead (as an acknowledged hack to tolerate the idiom) we will
+ // generate a direct cast of the integer value to a pointer.
+ //
+ // The idiom (p = nullptr + N) is not met if any of the following are true:
+ //
+ // The operation is subtraction.
+ // The index is not pointer-sized.
+ // The pointer type is not byte-sized.
+ //
+ if (BinaryOperator::isNullPointerArithmeticExtension(
+ cgf.getContext(), op.opcode, expr->getLHS(), expr->getRHS()))
+ return cgf.getBuilder().createIntToPtr(index, pointer.getType());
+
+ // Differently from LLVM codegen, ABI bits for index sizes is handled during
+ // LLVM lowering.
+
+ // If this is subtraction, negate the index.
+ if (isSubtraction)
+ index = cgf.getBuilder().createNeg(index);
+
+ assert(!cir::MissingFeatures::sanitizers());
+
+ const PointerType *pointerType =
+ pointerOperand->getType()->getAs<PointerType>();
+ if (!pointerType) {
+ cgf.cgm.errorNYI("Objective-C:pointer arithmetic with non-pointer type");
+ return nullptr;
+ }
+
+ QualType elementType = pointerType->getPointeeType();
+ if (cgf.getContext().getAsVariableArrayType(elementType)) {
+ cgf.cgm.errorNYI("variable array type");
+ return nullptr;
+ }
+
+ if (elementType->isVoidType() || elementType->isFunctionType()) {
+ cgf.cgm.errorNYI("void* or function pointer arithmetic");
+ return nullptr;
+ }
+
+ assert(!cir::MissingFeatures::sanitizers());
+ return cgf.getBuilder().create<cir::PtrStrideOp>(
+ cgf.getLoc(op.e->getExprLoc()), pointer.getType(), pointer, index);
}
mlir::Value ScalarExprEmitter::emitMul(const BinOpInfo &ops) {
diff --git a/clang/test/CIR/CodeGen/pointers.cpp b/clang/test/CIR/CodeGen/pointers.cpp
new file mode 100644
index 0000000000000..c791356e15bb6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should generate basic pointer arithmetics.
+void foo(int *iptr, char *cptr, unsigned ustride) {
+ iptr + 2;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+ cptr + 3;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s8i>
+ iptr - 2;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+ // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
+ cptr - 3;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+ // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s8i>
+ iptr + ustride;
+ // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), !cir.ptr<!s32i>
+
+ // Must convert unsigned stride to a signed one.
+ iptr - ustride;
+ // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+ // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i
+ // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
+
+ iptr++;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+
+ iptr--;
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<-1> : !s32i
+ // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerSubscriptAccess(int *ptr) {
+// CHECK: testPointerSubscriptAccess
+ ptr[1];
+ // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
+ // CHECK: %[[#PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+ // CHECK: cir.ptr_stride(%[[#PTR]] : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerMultiDimSubscriptAccess(int **ptr) {
+// CHECK: testPointerMultiDimSubscriptAccess
+ ptr[1][2];
+ // CHECK: %[[#STRIDE2:]] = cir.const #cir.int<2> : !s32i
+ // CHECK: %[[#STRIDE1:]] = cir.const #cir.int<1> : !s32i
+ // CHECK: %[[#PTR1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>>
+ // CHECK: %[[#PTR2:]] = cir.ptr_stride(%[[#PTR1]] : !cir.ptr<!cir.ptr<!s32i>>, %[[#STRIDE1]] : !s32i), !cir.ptr<!cir.ptr<!s32i>>
+ // CHECK: %[[#PTR3:]] = cir.load %[[#PTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+ // CHECK: cir.ptr_stride(%[[#PTR3]] : !cir.ptr<!s32i>, %[[#STRIDE2]] : !s32i), !cir.ptr<!s32i>
+}
+
+// This test is meant to verify code that handles the 'p = nullptr + n' idiom
+// used by some versions of glibc and gcc. This is undefined behavior but
+// it is intended there to act like a conversion from a pointer-sized integer
+// to a pointer, and we would like to tolerate that.
+
+#define NULLPTRINT ((int*)0)
+
+// This should get the inttoptr instruction.
+int *testGnuNullPtrArithmetic(unsigned n) {
+// CHECK: testGnuNullPtrArithmetic
+ return NULLPTRINT + n;
+ // CHECK: %[[NULLPTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+ // CHECK: %[[N:.*]] = cir.load %{{.*}} : !cir.ptr<!u32i>, !u32i
+ // CHECK: %[[RESULT:.*]] = cir.ptr_stride(%[[NULLPTR]] : !cir.ptr<!s32i>, %[[N]] : !u32i), !cir.ptr<!s32i>
+}
|
// CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), !cir.ptr<!s32i> | ||
|
||
// Must convert unsigned stride to a signed one. | ||
iptr - ustride; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we also test pointer - pointer
? That is pretty critical for pointer operations as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The ptr_diff
operation hasn't been upstreamed yet.
|
||
mlir::Value pointer = op.lhs; | ||
Expr *pointerOperand = expr->getLHS(); | ||
mlir::Value index = op.rhs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Calling this 'index' is suspicious, since pointer - pointer
is valid.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CIR has a dedicated ptr_diff
operation so pointer - pointer
won't go through here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah! I hadn't realized that. What makes that not go through here though? (EDIT, I see ScalarExprEmitter::emitSub handles this... the assert would be awesome though).
mlir::Value index = op.rhs; | ||
Expr *indexOperand = expr->getRHS(); | ||
|
||
// In a subtraction, the LHS is always the pointer. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment should probably elaborate.
`In the case of subtraction, the FE has ensured that the LHS is always the pointer. However addition can have the pointer on either side. Since CIR prefers these be normalized to the LHS having a pointer, we swap them to make that true.
|
||
// If this is subtraction, negate the index. | ||
if (isSubtraction) | ||
index = cgf.getBuilder().createNeg(index); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this work with pointers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, as mentioned above, we won't get here with pointers. I should probably put in an assert that index is an integral value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An assert would be awesome.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once comment + assert are in place, I'm happy.
|
||
mlir::Value pointer = op.lhs; | ||
Expr *pointerOperand = expr->getLHS(); | ||
mlir::Value index = op.rhs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah! I hadn't realized that. What makes that not go through here though? (EDIT, I see ScalarExprEmitter::emitSub handles this... the assert would be awesome though).
|
||
// If this is subtraction, negate the index. | ||
if (isSubtraction) | ||
index = cgf.getBuilder().createNeg(index); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An assert would be awesome.
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm, besides Erich's comments
auto sInt32Ty = getSInt32Ty(); | ||
return create<cir::ConstantOp>(loc, cir::IntAttr::get(sInt32Ty, c)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto sInt32Ty = getSInt32Ty(); | |
return create<cir::ConstantOp>(loc, cir::IntAttr::get(sInt32Ty, c)); | |
return getConstantInt(loc, getSInt32Ty(), c); |
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.