Skip to content

Commit a688b84

Browse files
committed
[Clang][CodeGen] Add metadata for load from reference
1 parent d5ff21d commit a688b84

File tree

45 files changed

+8897
-8867
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+8897
-8867
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ C++ Specific Potentially Breaking Changes
7777
7878
To fix this, update libstdc++ to version 14.1.1 or greater.
7979

80+
- Added ``!dereferenceable/!nonnull/!align`` metadata to load of references for better codegen.
81+
8082
ABI Changes in This Version
8183
---------------------------
8284
- Fixed Microsoft name mangling of implicitly defined variables used for thread

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2791,9 +2791,37 @@ CodeGenFunction::EmitLoadOfReference(LValue RefLVal,
27912791
llvm::LoadInst *Load =
27922792
Builder.CreateLoad(RefLVal.getAddress(), RefLVal.isVolatile());
27932793
CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo());
2794-
return makeNaturalAddressForPointer(Load, RefLVal.getType()->getPointeeType(),
2795-
CharUnits(), /*ForPointeeType=*/true,
2796-
PointeeBaseInfo, PointeeTBAAInfo);
2794+
QualType PTy = RefLVal.getType()->getPointeeType();
2795+
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
2796+
llvm::LLVMContext &Ctx = getLLVMContext();
2797+
llvm::MDBuilder MDB(Ctx);
2798+
// Emit !dereferenceable metadata
2799+
Load->setMetadata(
2800+
llvm::LLVMContext::MD_dereferenceable,
2801+
llvm::MDNode::get(Ctx,
2802+
MDB.createConstant(llvm::ConstantInt::get(
2803+
Builder.getInt64Ty(),
2804+
CGM.getMinimumObjectSize(PTy).getQuantity()))));
2805+
// Emit !nonnull metadata
2806+
if (CGM.getTypes().getTargetAddressSpace(PTy) == 0 &&
2807+
!CGM.getCodeGenOpts().NullPointerIsValid)
2808+
Load->setMetadata(llvm::LLVMContext::MD_nonnull,
2809+
llvm::MDNode::get(Ctx, {}));
2810+
// Emit !align metadata
2811+
if (PTy->isObjectType()) {
2812+
auto Align =
2813+
CGM.getNaturalPointeeTypeAlignment(RefLVal.getType()).getQuantity();
2814+
if (Align > 1) {
2815+
Load->setMetadata(
2816+
llvm::LLVMContext::MD_align,
2817+
llvm::MDNode::get(Ctx, MDB.createConstant(llvm::ConstantInt::get(
2818+
Builder.getInt64Ty(), Align))));
2819+
}
2820+
}
2821+
}
2822+
return makeNaturalAddressForPointer(Load, PTy, CharUnits(),
2823+
/*ForPointeeType=*/true, PointeeBaseInfo,
2824+
PointeeTBAAInfo);
27972825
}
27982826

27992827
LValue CodeGenFunction::EmitLoadOfReferenceLValue(LValue RefLVal) {

clang/test/OpenMP/amdgcn_target_codegen.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ int test_amdgcn_target_tid_threads_simd() {
3939
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
4040
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
4141
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
42-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
42+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !dereferenceable [[META10:![0-9]+]], !align [[META11:![0-9]+]]
4343
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr), ptr [[DYN_PTR]])
4444
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
4545
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -60,7 +60,7 @@ int test_amdgcn_target_tid_threads_simd() {
6060
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
6161
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
6262
// CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
63-
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
63+
// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
6464
// CHECK: worker.exit:
6565
// CHECK-NEXT: ret void
6666
// CHECK: for.end:
@@ -83,34 +83,34 @@ int test_amdgcn_target_tid_threads_simd() {
8383
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
8484
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
8585
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
86-
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
86+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META9]], !dereferenceable [[META10]], !align [[META11]]
8787
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr), ptr [[DYN_PTR]])
8888
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
8989
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
9090
// CHECK: user_code.entry:
9191
// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
9292
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
9393
// CHECK: omp.inner.for.cond:
94-
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
94+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
9595
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
9696
// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
9797
// CHECK: omp.inner.for.body:
98-
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
98+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
9999
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
100100
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
101-
// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
102-
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
101+
// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
102+
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
103103
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
104104
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
105-
// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
105+
// CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
106106
// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
107107
// CHECK: omp.body.continue:
108108
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
109109
// CHECK: omp.inner.for.inc:
110-
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
110+
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
111111
// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
112-
// CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
113-
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
112+
// CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
113+
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
114114
// CHECK: worker.exit:
115115
// CHECK-NEXT: ret void
116116
// CHECK: omp.inner.for.end:

0 commit comments

Comments
 (0)