-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Reland "[clang] Add nuw attribute to GEPs (#105496)" #107257
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Add nuw attribute to inbounds GEPs where the expression used to form the GEP is an addition of unsigned indices. Relands llvm#105496, which was reverted because it exposed a miscompilation arising from llvm#98608. This is now fixed by llvm#106512.
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-clang-codegen Author: Hari Limaye (hazzlim) ChangesAdd nuw attribute to inbounds GEPs where the expression used to form the GEP is an addition of unsigned indices. Relands #105496, which was reverted because it exposed a miscompilation arising from #98608. This is now fixed by #106512. Patch is 582.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107257.diff 91 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 08730a6a6672a1..b8036cf6e6a306 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -14,6 +14,7 @@
#include "CodeGenTypeCache.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GEPNoWrapFlags.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Type.h"
@@ -334,9 +335,10 @@ class CGBuilderTy : public CGBuilderBaseTy {
Address CreateGEP(Address Addr, ArrayRef<llvm::Value *> IdxList,
llvm::Type *ElementType, CharUnits Align,
- const Twine &Name = "") {
+ const Twine &Name = "",
+ llvm::GEPNoWrapFlags NW = llvm::GEPNoWrapFlags::none()) {
llvm::Value *Ptr = emitRawPointerFromAddress(Addr);
- return RawAddress(CreateGEP(Addr.getElementType(), Ptr, IdxList, Name),
+ return RawAddress(CreateGEP(Addr.getElementType(), Ptr, IdxList, Name, NW),
ElementType, Align);
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 7aa2d3d89c2936..88fbbe6c4c965f 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/FixedPointBuilder.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GEPNoWrapFlags.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
@@ -5759,7 +5760,12 @@ CodeGenFunction::EmitCheckedInBoundsGEP(llvm::Type *ElemTy, Value *Ptr,
bool SignedIndices, bool IsSubtraction,
SourceLocation Loc, const Twine &Name) {
llvm::Type *PtrTy = Ptr->getType();
- Value *GEPVal = Builder.CreateInBoundsGEP(ElemTy, Ptr, IdxList, Name);
+
+ llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::inBounds();
+ if (!SignedIndices && !IsSubtraction)
+ NWFlags |= llvm::GEPNoWrapFlags::noUnsignedWrap();
+
+ Value *GEPVal = Builder.CreateGEP(ElemTy, Ptr, IdxList, Name, NWFlags);
// If the pointer overflow sanitizer isn't enabled, do nothing.
if (!SanOpts.has(SanitizerKind::PointerOverflow))
@@ -5874,8 +5880,13 @@ Address CodeGenFunction::EmitCheckedInBoundsGEP(
Address Addr, ArrayRef<Value *> IdxList, llvm::Type *elementType,
bool SignedIndices, bool IsSubtraction, SourceLocation Loc, CharUnits Align,
const Twine &Name) {
- if (!SanOpts.has(SanitizerKind::PointerOverflow))
- return Builder.CreateInBoundsGEP(Addr, IdxList, elementType, Align, Name);
+ if (!SanOpts.has(SanitizerKind::PointerOverflow)) {
+ llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::inBounds();
+ if (!SignedIndices && !IsSubtraction)
+ NWFlags |= llvm::GEPNoWrapFlags::noUnsignedWrap();
+
+ return Builder.CreateGEP(Addr, IdxList, elementType, Align, Name, NWFlags);
+ }
return RawAddress(
EmitCheckedInBoundsGEP(Addr.getElementType(), Addr.emitRawPointer(*this),
diff --git a/clang/test/CodeGen/2005-01-02-ConstantInits.c b/clang/test/CodeGen/2005-01-02-ConstantInits.c
index 7772a64331ffb7..d90c2ea42da613 100644
--- a/clang/test/CodeGen/2005-01-02-ConstantInits.c
+++ b/clang/test/CodeGen/2005-01-02-ConstantInits.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --global-value-regex "@.+"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --global-value-regex "[A-Za-z].*"
// RUN: %clang_cc1 -triple=x86_64-unknown-linux %s -emit-llvm -o - | FileCheck %s
// This tests all kinds of hard cases with initializers and
@@ -51,7 +51,7 @@ int foo(int i) { return bar(&Arr[49])+bar(&Arr[i]); }
// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
// CHECK-NEXT: store ptr @Arr, ptr [[P]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P]], align 8
-// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+// CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i32 1
// CHECK-NEXT: store ptr [[INCDEC_PTR]], ptr [[P]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ADDR]], align 4
// CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[TMP1]] to i64
diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
index a3650beec625f2..4c4d0dfce05eaf 100644
--- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
@@ -1012,14 +1012,14 @@ test_shuffle() {
// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
// CHECK: sext i32 %[[AND4]] to i64
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
@@ -1050,7 +1050,7 @@ test_shuffle() {
// CHECK: sext i32 %[[AND4]] to i64
// CHECK-LE: store <2 x i64> <i64 1663540288323457296, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
// CHECK-BE: store <2 x i64> <i64 1157726452361532951, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
-// CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
+// CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
// CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
// CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16
@@ -1067,7 +1067,7 @@ test_shuffle() {
// CHECK: sext i32 %[[AND4]] to i64
// CHECK-LE: store <2 x i64> <i64 0, i64 2242261671028070680>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
// CHECK-BE: store <2 x i64> <i64 0, i64 1736447835066146335>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
-// CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
+// CHECK-COUNT-4: getelementptr inbounds nuw [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
// CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
void __attribute__((noinline))
diff --git a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
index 95dfd1202f1575..4a15fa9f76ceea 100644
--- a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
@@ -894,16 +894,16 @@ test_shuffle() {
// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
// CHECK: sext i32 %[[AND4]] to i64
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
-// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: getelementptr inbounds nuw [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
@@ -923,14 +923,14 @@ test_shuffle() {
// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
// CHECK: sext i32 %[[AND4]] to i64
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
-// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
+// CHECK: getelementptr inbounds nuw [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3
// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index ab36b6e7720ba3..a06e815737f4e6 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -118,7 +118,7 @@ void test1(struct annotated *p, int index, int val) {
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 2
// SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
// SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP2]]
@@ -134,7 +134,7 @@ void test1(struct annotated *p, int index, int val) {
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP0]]
// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITH-ATTR-NEXT: ret void
//
@@ -142,7 +142,7 @@ void test1(struct annotated *p, int index, int val) {
// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// SANITIZE-WITHOUT-ATTR-NEXT: entry:
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: ret void
//
@@ -150,7 +150,7 @@ void test1(struct annotated *p, int index, int val) {
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void
//
@@ -207,7 +207,7 @@ size_t test2_bdos(struct annotated *p) {
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 4)
@@ -231,7 +231,7 @@ size_t test2_bdos(struct annotated *p) {
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITH-ATTR-NEXT: ret void
//
@@ -239,7 +239,7 @@ size_t test2_bdos(struct annotated *p) {
// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// SANITIZE-WITHOUT-ATTR-NEXT: entry:
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: ret void
//
@@ -247,7 +247,7 @@ size_t test2_bdos(struct annotated *p) {
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
-// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void
//
diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c
index 39ede01d6e3b83..8a560a47ad1e10 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c
@@ -33,7 +33,7 @@ char *add_unsigned(char *base, unsigned long offset) {
// CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
// CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
// CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
- // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+ // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
// CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
// CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
// CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
index e93dbcb9f647bf..d884993ffb2b30 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
@@ -50,7 +50,7 @@ char *var_var(char *base, unsigned long offset) {
// CHECK-NEXT: store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
// CHECK-NEXT: %[[BASE_RELOADED:.*]] = load ptr, ptr %[[BASE_ADDR]], align 8
// CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
- // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
+ // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]]
// CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
// CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
// CHECK-SANITIZE-NEXT: %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitiz...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG if CI is happy.
Nice - CI seems happy, so I plan to land this later today if no objections. |
This change leads to a miscompilation; I've extracted a somewhat minimal repro case from the referenced issue above, in the comment: zeux/pugixml#629 (comment) The code, when compiled with
... which, as you can imagine, results in a sigsegv :) This seems to be architecture independent as it initially reproduced in Wasm and also confirmed on Arm. I believe that all accesses in this code are in-bounds and no overflow/underflow happens. The code is silent with This can be worked around like so: const char* end = data + length;
for (int i = 1; i <= 4; ++i)
{
uint8_t ch = static_cast<uint8_t>(end[-i]); ... however, I do not think this workaround should be necessary because I believe the original code is valid as is. |
Here's a slightly smaller reproducer that just reads one element from the array. Similarly, this crashes because the compiler generates a load from an absolute #include <stdio.h>
#include <stdint.h>
#include <string.h>
class xml_buffered_writer
{
public:
xml_buffered_writer(): bufsize(0)
{
}
__attribute__((noinline))
void write_string(const char* data)
{
// write the part of the string that fits in the buffer
size_t offset = bufsize;
while (*data && offset < bufcapacity)
buffer[offset++] = *data++;
// write the rest
if (offset < bufcapacity)
{
bufsize = offset;
}
else
{
// backtrack a bit if we have split the codepoint
size_t length = offset - bufsize;
const char* data_back = data - length;
printf("length %d\n", int(length));
printf("last char %c\n", data_back[length-1]);
bufsize = offset;
}
}
enum
{
bufcapacity = 16
};
char buffer[bufcapacity];
size_t bufsize;
};
int main()
{
xml_buffered_writer writer;
writer.write_string("abcdefghijklmnopqrstuvwxyz");
printf("\n");
} |
Reproduced: https://godbolt.org/z/G4jrhdjef . Looks like an instcombine bug. |
Reduced: https://llvm.godbolt.org/z/TPdd9qore I believe the problem is this line:
|
Should be fixed by 940f892. |
Confirming this fixes the original problem on the larger codebase, thanks! |
I quickly glanced through the other uses of setIsInBounds in LLVM; the one in CodeGenPrepare seems suspicious. |
I bisecting this emscripten (wasm32-unknown-emscripten) miscompile to this PR: emscripten-core/emscripten#22794. It looks like llvm is generating a load with a negative office (which is not supported under wasm). There is a reproducer in that but and I used the following commend to detect the bad load:
Here you can see an i64.load instruction with an offset of 4294967280 (which is -16 interpreted as an unsigned int). |
Perhaps yes. Do you know of any way to reduce the input program into something we can try to inspect. The preprocessed C++ file is over 100,000 lines :-/ |
I used
It looks like there is a -16 to ptr cast in there. I've ask the reported to try running with |
ubsan pointer overflow check should catch issues in the source code, I think. If that doesn't work, maybe try opt-bisect-limit to see if it turns up anything useful. |
Add nuw attribute to inbounds GEPs where the expression used to form the GEP is an addition of unsigned indices.
Relands #105496, which was reverted because it exposed a miscompilation arising from #98608. This is now fixed by #106512.