Skip to content

Commit ca05fb6

Browse files
committed
[Clang][CodeGen] Add metadata for load from reference
1 parent f0fad9f commit ca05fb6

File tree

52 files changed

+8824
-8794
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+8824
-8794
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ C++ Specific Potentially Breaking Changes
7676
7777
To fix this, update libstdc++ to version 14.1.1 or greater.
7878

79+
- Added ``!dereferenceable/!nonnull/!align`` metadata to load of references for better codegen.
80+
7981
ABI Changes in This Version
8082
---------------------------
8183
- Fixed Microsoft name mangling of implicitly defined variables used for thread

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2799,9 +2799,37 @@ CodeGenFunction::EmitLoadOfReference(LValue RefLVal,
27992799
llvm::LoadInst *Load =
28002800
Builder.CreateLoad(RefLVal.getAddress(), RefLVal.isVolatile());
28012801
CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo());
2802-
return makeNaturalAddressForPointer(Load, RefLVal.getType()->getPointeeType(),
2803-
CharUnits(), /*ForPointeeType=*/true,
2804-
PointeeBaseInfo, PointeeTBAAInfo);
2802+
QualType PTy = RefLVal.getType()->getPointeeType();
2803+
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
2804+
llvm::LLVMContext &Ctx = getLLVMContext();
2805+
llvm::MDBuilder MDB(Ctx);
2806+
// Emit !dereferenceable metadata
2807+
Load->setMetadata(
2808+
llvm::LLVMContext::MD_dereferenceable,
2809+
llvm::MDNode::get(Ctx,
2810+
MDB.createConstant(llvm::ConstantInt::get(
2811+
Builder.getInt64Ty(),
2812+
CGM.getMinimumObjectSize(PTy).getQuantity()))));
2813+
// Emit !nonnull metadata
2814+
if (CGM.getTypes().getTargetAddressSpace(PTy) == 0 &&
2815+
!CGM.getCodeGenOpts().NullPointerIsValid)
2816+
Load->setMetadata(llvm::LLVMContext::MD_nonnull,
2817+
llvm::MDNode::get(Ctx, {}));
2818+
// Emit !align metadata
2819+
if (PTy->isObjectType()) {
2820+
auto Align =
2821+
CGM.getNaturalPointeeTypeAlignment(RefLVal.getType()).getQuantity();
2822+
if (Align > 1) {
2823+
Load->setMetadata(
2824+
llvm::LLVMContext::MD_align,
2825+
llvm::MDNode::get(Ctx, MDB.createConstant(llvm::ConstantInt::get(
2826+
Builder.getInt64Ty(), Align))));
2827+
}
2828+
}
2829+
}
2830+
return makeNaturalAddressForPointer(Load, PTy, CharUnits(),
2831+
/*ForPointeeType=*/true, PointeeBaseInfo,
2832+
PointeeTBAAInfo);
28052833
}
28062834

28072835
LValue CodeGenFunction::EmitLoadOfReferenceLValue(LValue RefLVal) {

clang/test/CodeGenCXX/matrix-type-operators.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,11 @@ void insert(MyMatrix<EltTy, Rows, Columns> &Mat, EltTy e, unsigned i, unsigned j
215215

216216
void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i, unsigned j) {
217217
// CHECK-LABEL: @_Z21test_insert_template1R8MyMatrixIjLj2ELj2EEjjj(
218-
// NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
218+
// NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{.*}}
219219
// NOOPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}}
220220
// NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}}
221221
// NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}}
222-
// OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
222+
// OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{.*}}
223223
// OPT-NEXT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
224224
// OPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
225225
// OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
@@ -249,9 +249,9 @@ void test_insert_template1(MyMatrix<unsigned, 2, 2> &Mat, unsigned e, unsigned i
249249

250250
void test_insert_template2(MyMatrix<float, 3, 8> &Mat, float e) {
251251
// CHECK-LABEL: @_Z21test_insert_template2R8MyMatrixIfLj3ELj8EEf(
252-
// NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{$}}
252+
// NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8{{.*}}
253253
// NOOPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}}
254-
// OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
254+
// OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align 8, !tbaa !{{[0-9]+}}{{.*}}
255255
// OPT-NEXT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
256256
// CHECK-NEXT: call void @_Z6insertIfLj3ELj8EEvR8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noundef nonnull align 4 dereferenceable(96) [[MAT_ADDR]], float noundef [[E]], i32 noundef 2, i32 noundef 5)
257257
// CHECK-NEXT: ret void
@@ -346,11 +346,11 @@ double extract_IntWrapper_idx(double4x4 &m, IntWrapper i, UnsignedWrapper j) {
346346
// CHECK-NEXT: [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
347347
// CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
348348
// CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
349-
// NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
349+
// NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{.*}}
350350
// NOOPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}}
351351
// OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
352352
// OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
353-
// OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
353+
// OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{.*}}
354354
// OPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}}
355355
// CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
356356
// CHECK-NEXT: ret double [[MATEXT]]
@@ -377,8 +377,8 @@ void test_constexpr1(matrix_type<float, 4, 4> &m) {
377377
// OPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
378378
// CHECK-NEXT: [[IM:%.*]] = call noundef <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix)
379379
// CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]]
380-
// NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
381-
// OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
380+
// NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{.*}}
381+
// OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{.*}}
382382
// CHECK-NEXT: store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4
383383
// CHECK-NEXT: ret voi
384384

@@ -408,8 +408,8 @@ void test_constexpr2(matrix_type<int, 5, 5> &m) {
408408
// OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
409409
// CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
410410
// CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
411-
// NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
412-
// OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
411+
// NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{.*}}
412+
// OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{.*}}
413413
// CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
414414
// CHECK-NEXT: ret void
415415
//
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -o - %s -O2 | FileCheck %s
22

3-
// Make sure the call to b() doesn't get optimized out.
3+
// Make sure the call to b() is eliminated.
44
extern struct x {char& x,y;}y;
55
int b();
66
int a() { if (!&y.x) b(); }
77

8-
// CHECK: @_Z1bv
8+
// CHECK-NOT: @_Z1bv

clang/test/OpenMP/amdgcn_target_codegen.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ int test_amdgcn_target_tid_threads_simd() {
3939
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
4040
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
4141
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
42-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
42+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !dereferenceable [[META10:![0-9]+]], !align [[META11:![0-9]+]]
4343
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr), ptr [[DYN_PTR]])
4444
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
4545
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -60,7 +60,7 @@ int test_amdgcn_target_tid_threads_simd() {
6060
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
6161
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
6262
// CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
63-
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
63+
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
6464
// CHECK: worker.exit:
6565
// CHECK-NEXT: ret void
6666
// CHECK: for.end:
@@ -83,34 +83,34 @@ int test_amdgcn_target_tid_threads_simd() {
8383
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
8484
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
8585
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
86-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
86+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META9]], !dereferenceable [[META10]], !align [[META11]]
8787
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr), ptr [[DYN_PTR]])
8888
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
8989
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
9090
// CHECK: user_code.entry:
9191
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
9292
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
9393
// CHECK: omp.inner.for.cond:
94-
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
94+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
9595
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
9696
// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
9797
// CHECK: omp.inner.for.body:
98-
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
98+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
9999
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
100100
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
101-
// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
102-
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
101+
// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
102+
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
103103
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
104104
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
105-
// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
105+
// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
106106
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
107107
// CHECK: omp.body.continue:
108108
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
109109
// CHECK: omp.inner.for.inc:
110-
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
110+
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
111111
// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
112-
// CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
113-
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
112+
// CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
113+
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
114114
// CHECK: worker.exit:
115115
// CHECK-NEXT: ret void
116116
// CHECK: omp.inner.for.end:

0 commit comments

Comments
 (0)