Skip to content

Commit 704d732

Browse files
committed
[RFC] IR: Define noalias.addrspace metadata
This is intended to solve a problem with lowering atomics in OpenMP and C++ common to AMDGPU and NVPTX. In OpenCL and CUDA, it is undefined behavior for an atomic instruction to modify an object in thread private memory. In OpenMP, it is defined. Correspondingly, the hardware does not handle this correctly. For AMDGPU, 32-bit atomics work and 64-bit atomics are silently dropped. We therefore need to codegen this by inserting a runtime address space check, performing the private case without atomics, and fallback to issuing the real atomic otherwise. This metadata allows us to avoid this extra check and branch. Handle this by introducing metadata intended to be applied to atomicrmw, indicating they cannot access the forbidden address space.
1 parent 56b2907 commit 704d732

File tree

6 files changed

+237
-6
lines changed

6 files changed

+237
-6
lines changed

llvm/docs/LangRef.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8021,6 +8021,42 @@ it will contain a list of ids, including the ids of the callsites in the
80218021
full inline sequence, in order from the leaf-most call's id to the outermost
80228022
inlined call.
80238023

8024+
8025+
'``noalias.addrspace``' Metadata
8026+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8027+
8028+
The ``noalias.addrspace`` metadata is used to identify memory
8029+
operations which cannot access a range of address spaces. It is
8030+
attached to memory instructions, including :ref:`atomicrmw
8031+
<i_atomicrmw>`, :ref:`cmpxchg <i_cmpxchg>`, and :ref:`call <i_call>`
8032+
instructions.
8033+
8034+
This follows the same form as :ref:`range metadata <_range-metadata>`,
8035+
except the field entries must be of type `i32`. The interpretation is
8036+
the same numeric address spaces as applied to IR values.
8037+
8038+
Example:
8039+
8040+
.. code-block:: llvm
8041+
; %ptr cannot point to an object allocated in addrspace(5)
8042+
%rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0
8043+
8044+
; Undefined behavior. The underlying object is allocated in one of the listed
8045+
; address spaces.
8046+
%alloca = alloca i64, addrspace(5)
8047+
%alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
8048+
%rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0
8049+
8050+
!0 = !{i32 5, i32 6}
8051+
8052+
8053+
This is intended for use on targets with a notion of generic address
8054+
spaces, which at runtime resolve to different physical memory
8055+
spaces. The interpretation of the address space values is target
8056+
specific. The behavior is undefined if the runtime memory address does
8057+
resolve to an object defined in one of the indicated address spaces.
8058+
8059+
80248060
Module Flags Metadata
80258061
=====================
80268062

llvm/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Changes to the LLVM IR
5454
the standard vector type ``<1 x i64>`` in bitcode upgrade.
5555
* Renamed ``llvm.experimental.stepvector`` intrinsic to ``llvm.stepvector``.
5656

57+
* Introduced `noalias.addrspace` metadata.
58+
5759
Changes to LLVM infrastructure
5860
------------------------------
5961

llvm/include/llvm/IR/FixedMetadataKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37)
5252
LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
5353
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
5454
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
55+
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)

llvm/lib/IR/Verifier.cpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
515515
void visitFunction(const Function &F);
516516
void visitBasicBlock(BasicBlock &BB);
517517
void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty,
518-
bool IsAbsoluteSymbol);
518+
bool IsAbsoluteSymbol, bool IsAddrSpaceRange);
519519
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
520+
void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty);
520521
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
521522
void visitProfMetadata(Instruction &I, MDNode *MD);
522523
void visitCallStackMetadata(MDNode *MD);
@@ -760,7 +761,7 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
760761
if (const MDNode *AbsoluteSymbol =
761762
GO->getMetadata(LLVMContext::MD_absolute_symbol)) {
762763
verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()),
763-
true);
764+
true, false);
764765
}
765766
}
766767

@@ -4130,7 +4131,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
41304131
/// Verify !range and !absolute_symbol metadata. These have the same
41314132
/// restrictions, except !absolute_symbol allows the full set.
41324133
void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
4133-
Type *Ty, bool IsAbsoluteSymbol) {
4134+
Type *Ty, bool IsAbsoluteSymbol,
4135+
bool IsAddrSpaceRange) {
41344136
unsigned NumOperands = Range->getNumOperands();
41354137
Check(NumOperands % 2 == 0, "Unfinished range!", Range);
41364138
unsigned NumRanges = NumOperands / 2;
@@ -4147,8 +4149,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41474149

41484150
Check(High->getType() == Low->getType(), "Range pair types must match!",
41494151
&I);
4150-
Check(High->getType() == Ty->getScalarType(),
4151-
"Range types must match instruction type!", &I);
4152+
4153+
if (IsAddrSpaceRange) {
4154+
Check(High->getType()->isIntegerTy(32),
4155+
"noalias.addrspace type must be i32!", &I);
4156+
} else {
4157+
Check(High->getType() == Ty->getScalarType(),
4158+
"Range types must match instruction type!", &I);
4159+
}
41524160

41534161
APInt HighV = High->getValue();
41544162
APInt LowV = Low->getValue();
@@ -4187,7 +4195,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41874195
void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
41884196
assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
41894197
"precondition violation");
4190-
verifyRangeMetadata(I, Range, Ty, false);
4198+
verifyRangeMetadata(I, Range, Ty, false, false);
4199+
}
4200+
4201+
void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range,
4202+
Type *Ty) {
4203+
assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) &&
4204+
"precondition violation");
4205+
verifyRangeMetadata(I, Range, Ty, false, true);
41914206
}
41924207

41934208
void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
@@ -5180,6 +5195,13 @@ void Verifier::visitInstruction(Instruction &I) {
51805195
visitRangeMetadata(I, Range, I.getType());
51815196
}
51825197

5198+
if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) {
5199+
Check(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicRMWInst>(I) ||
5200+
isa<AtomicCmpXchgInst>(I) || isa<CallInst>(I),
5201+
"noalias.addrspace are only for memory operations!", &I);
5202+
visitNoaliasAddrspaceMetadata(I, Range, I.getType());
5203+
}
5204+
51835205
if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
51845206
Check(isa<LoadInst>(I) || isa<StoreInst>(I),
51855207
"invariant.group metadata is only for loads and stores", &I);
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) {
4+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1(
5+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
6+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]]
7+
; CHECK-NEXT: ret i64 [[RET]]
8+
;
9+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0
10+
ret i64 %ret
11+
}
12+
13+
define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) {
14+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2(
15+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
16+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]]
17+
; CHECK-NEXT: ret i64 [[RET]]
18+
;
19+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1
20+
ret i64 %ret
21+
}
22+
23+
define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) {
24+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3(
25+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
26+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]]
27+
; CHECK-NEXT: ret i64 [[RET]]
28+
;
29+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2
30+
ret i64 %ret
31+
}
32+
33+
define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) {
34+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges(
35+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
36+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]]
37+
; CHECK-NEXT: ret i64 [[RET]]
38+
;
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
40+
ret i64 %ret
41+
}
42+
43+
define i64 @load_noalias_addrspace__5_6(ptr %ptr) {
44+
; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6(
45+
; CHECK-SAME: ptr [[PTR:%.*]]) {
46+
; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]]
47+
; CHECK-NEXT: ret i64 [[RET]]
48+
;
49+
%ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4
50+
ret i64 %ret
51+
}
52+
53+
define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) {
54+
; CHECK-LABEL: define void @store_noalias_addrspace__5_6(
55+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
56+
; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]]
57+
; CHECK-NEXT: ret void
58+
;
59+
store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4
60+
ret void
61+
}
62+
63+
define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) {
64+
; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(
65+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) {
66+
; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]]
67+
; CHECK-NEXT: ret { i64, i1 } [[RET]]
68+
;
69+
%ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4
70+
ret { i64, i1 } %ret
71+
}
72+
73+
declare void @foo()
74+
75+
define void @call_noalias_addrspace__5_6(ptr %ptr) {
76+
; CHECK-LABEL: define void @call_noalias_addrspace__5_6(
77+
; CHECK-SAME: ptr [[PTR:%.*]]) {
78+
; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]]
79+
; CHECK-NEXT: ret void
80+
;
81+
call void @foo(), !noalias.addrspace !4
82+
ret void
83+
}
84+
85+
define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) {
86+
; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6(
87+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
88+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]]
89+
; CHECK-NEXT: ret void
90+
;
91+
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4
92+
ret void
93+
}
94+
95+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
96+
97+
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
98+
99+
!0 = !{i32 0, i32 1}
100+
!1 = !{i32 0, i32 2}
101+
!2 = !{i32 1, i32 3}
102+
!3 = !{i32 4, i32 6, i32 10, i32 55}
103+
!4 = !{i32 5, i32 6}
104+
;.
105+
; CHECK: [[META0]] = !{i32 0, i32 1}
106+
; CHECK: [[META1]] = !{i32 0, i32 2}
107+
; CHECK: [[META2]] = !{i32 1, i32 3}
108+
; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55}
109+
; CHECK: [[META4]] = !{i32 5, i32 6}
110+
;.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
2+
3+
; CHECK: It should have at least one range!
4+
; CHECK-NEXT: !0 = !{}
5+
define i64 @noalias_addrspace__empty(ptr %ptr, i64 %val) {
6+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !0
7+
ret i64 %ret
8+
}
9+
10+
; CHECK: Unfinished range!
11+
; CHECK-NEXT: !1 = !{i32 0}
12+
define i64 @noalias_addrspace__single_field(ptr %ptr, i64 %val) {
13+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1
14+
ret i64 %ret
15+
}
16+
17+
; CHECK: Range must not be empty!
18+
; CHECK-NEXT: !2 = !{i32 0, i32 0}
19+
define i64 @noalias_addrspace__0_0(ptr %ptr, i64 %val) {
20+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !2
21+
ret i64 %ret
22+
}
23+
24+
; CHECK: noalias.addrspace type must be i32!
25+
; CHECK-NEXT: %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
26+
define i64 @noalias_addrspace__i64(ptr %ptr, i64 %val) {
27+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !3
28+
ret i64 %ret
29+
}
30+
31+
; CHECK: The lower limit must be an integer!
32+
define i64 @noalias_addrspace__fp(ptr %ptr, i64 %val) {
33+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !4
34+
ret i64 %ret
35+
}
36+
37+
; CHECK: The lower limit must be an integer!
38+
define i64 @noalias_addrspace__ptr(ptr %ptr, i64 %val) {
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !5
40+
ret i64 %ret
41+
}
42+
43+
; CHECK: The lower limit must be an integer!
44+
define i64 @noalias_addrspace__nonconstant(ptr %ptr, i64 %val) {
45+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !6
46+
ret i64 %ret
47+
}
48+
49+
@gv0 = global i32 0
50+
@gv1 = global i32 1
51+
52+
!0 = !{}
53+
!1 = !{i32 0}
54+
!2 = !{i32 0, i32 0}
55+
!3 = !{i64 1, i64 5}
56+
!4 = !{float 0.0, float 2.0}
57+
!5 = !{ptr null, ptr addrspace(1) null}
58+
!6 = !{i32 ptrtoint (ptr @gv0 to i32), i32 ptrtoint (ptr @gv1 to i32) }
59+
60+

0 commit comments

Comments
 (0)