-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][CodeGen] Allow memcpy replace with trivial auto var init #84230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][CodeGen] Allow memcpy replace with trivial auto var init #84230
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang-codegen Author: Antonio Frighetto (antoniofrighetto) Changes…-ftrivial-auto-var-init (#71677)" This reverts commit afe8b93. Fixes regression: #84178. Full diff: https://github.com/llvm/llvm-project/pull/84230.diff 6 Files Affected:
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..46cfd3f10a3fb7 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1244,24 +1244,29 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
// If the initializer is small, use a handful of stores.
if (shouldSplitConstantStore(CGM, ConstantSize)) {
if (auto *STy = dyn_cast<llvm::StructType>(Ty)) {
- const llvm::StructLayout *Layout =
- CGM.getDataLayout().getStructLayout(STy);
- for (unsigned i = 0; i != constant->getNumOperands(); i++) {
- CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i));
- Address EltPtr = Builder.CreateConstInBoundsByteGEP(
- Loc.withElementType(CGM.Int8Ty), CurOff);
- emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
- constant->getAggregateElement(i), IsAutoInit);
+ // FIXME: handle the case when STy != Loc.getElementType().
+ if (STy == Loc.getElementType()) {
+ for (unsigned i = 0; i != constant->getNumOperands(); i++) {
+ Address EltPtr = Builder.CreateStructGEP(Loc, i);
+ emitStoresForConstant(
+ CGM, D, EltPtr, isVolatile, Builder,
+ cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)),
+ IsAutoInit);
+ }
+ return;
}
- return;
} else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) {
- for (unsigned i = 0; i != ATy->getNumElements(); i++) {
- Address EltPtr = Builder.CreateConstGEP(
- Loc.withElementType(ATy->getElementType()), i);
- emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
- constant->getAggregateElement(i), IsAutoInit);
+ // FIXME: handle the case when ATy != Loc.getElementType().
+ if (ATy == Loc.getElementType()) {
+ for (unsigned i = 0; i != ATy->getNumElements(); i++) {
+ Address EltPtr = Builder.CreateConstArrayGEP(Loc, i);
+ emitStoresForConstant(
+ CGM, D, EltPtr, isVolatile, Builder,
+ cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)),
+ IsAutoInit);
+ }
+ return;
}
- return;
}
}
diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index 2886a32974b066..4f393d9e6b7f32 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -134,8 +134,8 @@ void g6() {
f6m(1, 2, 3, 4, 5, s);
}
// CHECK: define{{.*}} void @g6
-// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
-// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 0])
+// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
+// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
// CHECK: declare void @f6(i32 noundef, [4 x i32])
// CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [4 x i32])
}
diff --git a/clang/test/CodeGen/aapcs64-align.cpp b/clang/test/CodeGen/aapcs64-align.cpp
index 759413cbc4b56f..de231f2123b975 100644
--- a/clang/test/CodeGen/aapcs64-align.cpp
+++ b/clang/test/CodeGen/aapcs64-align.cpp
@@ -75,8 +75,8 @@ void g4() {
f4m(1, 2, 3, 4, 5, s);
}
// CHECK: define{{.*}} void @g4()
-// CHECK: call void @f4(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] %{{.*}})
+// CHECK: call void @f4(i32 noundef 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f4m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] [i64 30064771078, i64 0])
// CHECK: declare void @f4(i32 noundef, [2 x i64])
// CHECK: declare void @f4m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i64])
@@ -95,8 +95,8 @@ void f5m(int, int, int, int, int, P16);
f5m(1, 2, 3, 4, 5, s);
}
// CHECK: define{{.*}} void @g5()
-// CHECK: call void @f5(i32 noundef 1, [2 x i64] %{{.*}})
-// CHECK: void @f5m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] %{{.*}})
+// CHECK: call void @f5(i32 noundef 1, [2 x i64] [i64 30064771078, i64 0])
+// CHECK: void @f5m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i64] [i64 30064771078, i64 0])
// CHECK: declare void @f5(i32 noundef, [2 x i64])
// CHECK: declare void @f5m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i64])
diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp
index e5a9d015f22f27..6cb18528ebadcd 100644
--- a/clang/test/CodeGenCXX/auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/auto-var-init.cpp
@@ -89,14 +89,22 @@ struct padded { char c; int i; };
// PATTERN-O1-NOT: @__const.test_paddednullinit_custom.custom
struct paddednullinit { char c = 0; int i = 0; };
// PATTERN-O0: @__const.test_paddedpacked_uninit.uninit = private unnamed_addr constant %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>, align 1
+// PATTERN: @__const.test_paddedpacked_custom.custom = private unnamed_addr constant %struct.paddedpacked <{ i8 42, i32 13371337 }>, align 1
+// ZERO: @__const.test_paddedpacked_custom.custom = private unnamed_addr constant %struct.paddedpacked <{ i8 42, i32 13371337 }>, align 1
struct paddedpacked { char c; int i; } __attribute__((packed));
// PATTERN-O0: @__const.test_paddedpackedarray_uninit.uninit = private unnamed_addr constant %struct.paddedpackedarray { [2 x %struct.paddedpacked] [%struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>, %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>] }, align 1
+// PATTERN: @__const.test_paddedpackedarray_custom.custom = private unnamed_addr constant %struct.paddedpackedarray { [2 x %struct.paddedpacked] [%struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }>] }, align 1
+// ZERO: @__const.test_paddedpackedarray_custom.custom = private unnamed_addr constant %struct.paddedpackedarray { [2 x %struct.paddedpacked] [%struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }>] }, align 1
struct paddedpackedarray { struct paddedpacked p[2]; };
// PATTERN-O0: @__const.test_unpackedinpacked_uninit.uninit = private unnamed_addr constant <{ { i8, [3 x i8], i32 }, i8 }> <{ { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, i8 [[I8]] }>, align 1
struct unpackedinpacked { padded a; char b; } __attribute__((packed));
// PATTERN-O0: @__const.test_paddednested_uninit.uninit = private unnamed_addr constant { { i8, [3 x i8], i32 }, { i8, [3 x i8], i32 } } { { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] } }, align 4
+// PATTERN: @__const.test_paddednested_custom.custom = private unnamed_addr constant { { i8, [3 x i8], i32 }, { i8, [3 x i8], i32 } } { { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, { i8, [3 x i8], i32 } { i8 43, [3 x i8] zeroinitializer, i32 13371338 } }, align 4
+// ZERO: @__const.test_paddednested_custom.custom = private unnamed_addr constant { { i8, [3 x i8], i32 }, { i8, [3 x i8], i32 } } { { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, { i8, [3 x i8], i32 } { i8 43, [3 x i8] zeroinitializer, i32 13371338 } }, align 4
struct paddednested { struct padded p1, p2; };
// PATTERN-O0: @__const.test_paddedpackednested_uninit.uninit = private unnamed_addr constant %struct.paddedpackednested { %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>, %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }> }, align 1
+// PATTERN: @__const.test_paddedpackednested_custom.custom = private unnamed_addr constant %struct.paddedpackednested { %struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }> }, align 1
+// ZERO: @__const.test_paddedpackednested_custom.custom = private unnamed_addr constant %struct.paddedpackednested { %struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }> }, align 1
struct paddedpackednested { struct paddedpacked p1, p2; };
// PATTERN-O0: @__const.test_bitfield_uninit.uninit = private unnamed_addr constant %struct.bitfield { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]" }, align 4
// PATTERN-O0: @__const.test_bitfield_custom.custom = private unnamed_addr constant %struct.bitfield { i8 20, [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]" }, align 4
@@ -134,8 +142,12 @@ struct arraytail { int i; int arr[]; };
// PATTERN-O1-NOT: @__const.test_bool4_custom.custom
// ZERO-O1-NOT: @__const.test_bool4_custom.custom
+// PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x ptr] [ptr inttoptr ([[IPTRT]] 572662306 to ptr), ptr inttoptr ([[IPTRT]] 572662306 to ptr), ptr inttoptr ([[IPTRT]] 572662306 to ptr), ptr inttoptr ([[IPTRT]] 572662306 to ptr)], align
+// ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x ptr] [ptr inttoptr (i64 572662306 to ptr), ptr inttoptr (i64 572662306 to ptr), ptr inttoptr (i64 572662306 to ptr), ptr inttoptr (i64 572662306 to ptr)], align 16
// PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }, { i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }, { i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }, { i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }], align
// PATTERN-O1-NOT: @__const.test_tailpad4_uninit.uninit
+// PATTERN: @__const.test_tailpad4_custom.custom = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }], align
+// ZERO: @__const.test_tailpad4_custom.custom = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }], align 16
struct tailpad { short s; char c; };
// PATTERN-O0: @__const.test_atomicnotlockfree_uninit.uninit = private unnamed_addr constant %struct.notlockfree { [4 x i64] {{\[}}i64 [[I64]], i64 [[I64]], i64 [[I64]], i64 [[I64]]] }, align
// PATTERN-O1-NOT: @__const.test_atomicnotlockfree_uninit.uninit
@@ -702,8 +714,7 @@ TEST_UNINIT(padded, padded);
// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit)
// PATTERN-LABEL: @test_padded_uninit()
// PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_padded_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]]
-// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8
-// PATTERN-O1-NOT: !annotation
+// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8, !annotation [[AUTO_INIT]]
// ZERO-LABEL: @test_padded_uninit()
// ZERO-O0: call void @llvm.memset{{.*}}, i8 0,{{.+}}), !annotation [[AUTO_INIT]]
// ZERO-O1: store i64 0, ptr %uninit, align 8, !annotation [[AUTO_INIT]]
@@ -729,8 +740,7 @@ TEST_UNINIT(paddednullinit, paddednullinit);
// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit)
// PATTERN-LABEL: @test_paddednullinit_uninit()
// PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddednullinit_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]]
-// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8
-// PATTERN-O1-NOT: !annotation
+// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8, !annotation [[AUTO_INIT]]
// ZERO-LABEL: @test_paddednullinit_uninit()
// ZERO-O0: call void @llvm.memset{{.*}}, i8 0, {{.*}}, !annotation [[AUTO_INIT]]
// ZERO-O1: store i64 0, ptr %uninit, align 8
@@ -768,8 +778,9 @@ TEST_UNINIT(paddedpacked, paddedpacked);
// PATTERN-LABEL: @test_paddedpacked_uninit()
// PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddedpacked_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]]
// PATTERN-O1: store i8 [[I8]], ptr %uninit, align {{.+}}, !annotation [[AUTO_INIT]]
-// PATTERN-O1: %[[I:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 1
+// PATTERN-O1: %[[I:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 0, i32 1
// PATTERN-O1: store i32 [[I32]], ptr %[[I]], align {{.+}}, !annotation [[AUTO_INIT]]
+
// ZERO-LABEL: @test_paddedpacked_uninit()
// ZERO: call void @llvm.memset{{.*}}, i8 0,{{.+}}), !annotation [[AUTO_INIT]]
@@ -1181,8 +1192,7 @@ TEST_UNINIT(atomicpadded, _Atomic(padded));
// CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit)
// PATTERN-LABEL: @test_atomicpadded_uninit()
// PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicpadded_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]]
-// PATTERN-O1: store i64 [[IPTR]], ptr %uninit, align 8
-// PATTERN-O1-NOT: !annotation
+// PATTERN-O1: store i64 [[IPTR]], ptr %uninit, align 8, !annotation [[AUTO_INIT]]
// ZERO-LABEL: @test_atomicpadded_uninit()
// ZERO-O0: call void @llvm.memset{{.*}}, i8 0, {{.+}}), !annotation [[AUTO_INIT]]
// ZERO-O1: store i64 0, ptr %uninit, align 8, !annotation [[AUTO_INIT]]
@@ -1204,7 +1214,8 @@ TEST_UNINIT(complexfloat, _Complex float);
// PATTERN-LABEL: @test_complexfloat_uninit()
// PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_complexfloat_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]]
// PATTERN-O1: store float 0xFFFFFFFFE0000000, ptr %uninit, align {{.+}}, !annotation [[AUTO_INIT]]
-// PATTERN-O1: %[[F2:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 4
+
+// PATTERN-O1: %[[F2:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 0, i32 1
// PATTERN-O1: store float 0xFFFFFFFFE0000000, ptr %[[F2]], align {{.+}}, !annotation [[AUTO_INIT]]
// ZERO-LABEL: @test_complexfloat_uninit()
@@ -1303,9 +1314,7 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 0x44444444 });
// CHECK-O0: call void @llvm.memcpy
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
-// CHECK-O1: store i32 1145324612, ptr %custom, align 4
-// CHECK-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 4
-// CHECK-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4
+// CHECK-O1: store i64 4919131752989213764, ptr %custom, align 8
// CHECK-NOT: !annotation
TEST_UNINIT(semivolatileinit, semivolatileinit);
@@ -1418,7 +1427,7 @@ TEST_CUSTOM(matching, matching, { .f = 0xf00f });
// CHECK-O0: call void @llvm.memcpy
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
-// CHECK-O1: store float 6.145500e+04, ptr {{.*}}, align 4
+// CHECK-O1: store i32 1198526208, ptr {{.*}}, align 4
// CHECK-NOT: !annotation
TEST_UNINIT(matchingreverse, matchingreverse);
@@ -1497,16 +1506,8 @@ TEST_CUSTOM(unmatchedreverse, unmatchedreverse, { .c = 42 });
// CHECK-O0: call void @llvm.memcpy
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
-// PATTERN-O1: store i8 42, ptr {{.*}}, align 4
-// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1
-// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}}
-// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 2
-// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}}
-// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 3
-// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}}
-// ZERO-O1: store i8 42, ptr {{.*}}, align 4
-// ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1
-// ZERO-O1-NEXT: call void @llvm.memset.{{.*}}({{.*}}, i8 0, i64 3, {{.*}})
+// PATTERN-O1: store i32 -1431655894, ptr {{.*}}, align 4
+// ZERO-O1: store i32 42, ptr {{.*}}, align 4
TEST_UNINIT(unmatchedfp, unmatchedfp);
// CHECK-LABEL: @test_unmatchedfp_uninit()
@@ -1531,7 +1532,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 3.1415926535897932384626433 });
// CHECK-O0: call void @llvm.memcpy
// CHECK-NOT: !annotation
// CHECK-O0: call void @{{.*}}used{{.*}}%custom)
-// CHECK-O1: store double 0x400921FB54442D18, ptr %custom, align 8
+// CHECK-O1: store i64 4614256656552045848, ptr %custom, align 8
// CHECK-NOT: !annotation
TEST_UNINIT(emptyenum, emptyenum);
diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
index 6c84485b66b4a0..edf6dbf8657cbe 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-printf.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
@@ -30,14 +30,7 @@ __kernel void test_printf_int(int i) {
// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
// CHECK-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT: [[LOC0:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 0
-// CHECK-NEXT: store i8 102, ptr addrspace(5) [[LOC0]], align 1
-// CHECK-NEXT: [[LOC1:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 1
-// CHECK-NEXT: store i8 111, ptr addrspace(5) [[LOC1]], align 1
-// CHECK-NEXT: [[LOC2:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 2
-// CHECK-NEXT: store i8 111, ptr addrspace(5) [[LOC2]], align 1
-// CHECK-NEXT: [[LOC3:%.*]] = getelementptr i8, ptr addrspace(5) [[S]], i64 3
-// CHECK-NEXT: store i8 0, ptr addrspace(5) [[LOC3]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false)
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]]
// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]]
diff --git a/clang/test/OpenMP/bug54082.c b/clang/test/OpenMP/bug54082.c
index b88b68fd43012a..337c120983e0a3 100644
--- a/clang/test/OpenMP/bug54082.c
+++ b/clang/test/OpenMP/bug54082.c
@@ -69,9 +69,7 @@ void foo() {
// CHECK-NEXT: [[X_TRAITS:%.*]] = alloca [1 x %struct.omp_alloctrait_t], align 16
// CHECK-NEXT: [[X_ALLOC:%.*]] = alloca i64, align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[X_TRAITS]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT: store i32 2, ptr [[X_TRAITS]], align 16
-// CHECK-NEXT: [[LOC0:%.*]] = getelementptr inbounds i8, ptr [[X_TRAITS]], i64 8
-// CHECK-NEXT: store i64 64, ptr [[LOC0]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) [[X_TRAITS]], ptr noundef nonnull align 16 dereferenceable(16) @__const.foo.x_traits, i64 16, i1 false)
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[X_ALLOC]]) #[[ATTR5]]
// CHECK-NEXT: [[CALL:%.*]] = call i64 @omp_init_allocator(i64 noundef 0, i32 noundef 1, ptr noundef nonnull [[X_TRAITS]]) #[[ATTR5]]
// CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
Can you please provide more information on what the regression was / how this fixes it? |
I'm not exactly sure if this is meant to occur with |
bb22ecc
to
91ca7b2
Compare
I think manually checking if |
91ca7b2
to
e5af3e3
Compare
Fixed |
Will there be a backport for 18.1.x? (The current patch doesn't apply cleanly to 18.1.0.) |
You should update the patch summary since this is no longer a revert. |
This change also "fixes" #78034 in the sense that we return to the clang 17 status quo of the emitted IR being wrong but mostly working out in practice. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is clearly not what we want long-term. Either clang or LLVM should have better heuristics based on the actual values being stored; the syntax used at the C level shouldn't affect codegen.
That said, reverting the change with a carveout for the trivial var init is probably reasonable as a temporary step, if we want something we can cherry-pick for 18.x.
Please add a better comment explaining the history here; the current comment isn't really sufficient to explain why we're using heuristics which don't really make sense.
869f015
to
3c006a4
Compare
@efriedma-quic, comment updated, thanks. |
@antoniofrighetto You need to update the PR description, not the commit message. |
@nikic, updated PR description as well, thanks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the changes here make sense (keeping in mind what @efriedma-quic says about long-term goals vs 18.x practicality). Eli, @serge-sans-paille, are you okay with the changes as well? (Please don't land on my approval without waiting a bit for Eli and Serge.)
If so, I'd like to get this backported into 18.x as well because of the issues we're seeing in the wild. Any disagreement?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LGTM too. It's a bit sad that we couldn't make the optimization part of LLVM better instead of generating ad-hoc pattern, but as I couldn't find the time to work more on this, I'm not going to complain! |
3c006a4
to
bfc29a3
Compare
When emitting the storage (or memory copy operations) for constant initializers, the decision whether to split a constant structure or array store into a sequence of field stores or to use `memcpy` is based upon the optimization level and the size of the initializer. In afe8b93, we extended this by allowing constants to be split when the array (or struct) type does not match the type of data the address to the object (constant) is expected to contain. This may happen when `emitStoresForConstant` is called by `EmitAutoVarInit`, as the element type of the address gets shrunk. When this occurs, let the initializer be split into a bunch of stores only under `-ftrivial-auto-var-init=pattern`. Fixes: llvm#84178.
bfc29a3
to
b433076
Compare
Do we need to backport this to release/18.x? |
When emitting the storage (or memory copy operations) for constant initializers, the decision whether to split a constant structure or array store into a sequence of field stores or to use
memcpy
is based upon the optimization level and the size of the initializer. In afe8b93, we extended this by allowing constants to be split when the array (or struct) type does not match the type of data the address to the object (constant) is expected to contain. This may happen whenemitStoresForConstant
is called byEmitAutoVarInit
, as the element type of the address gets shrunk. When this occurs, let the initializer be split into a bunch of stores only under-ftrivial-auto-var-init=pattern
.Fixes regression: #84178.