9
9
#ifndef AMDGPU
10
10
#define AMDGPU
11
11
12
+ include "mlir/Interfaces/InferTypeOpInterface.td"
12
13
include "mlir/Interfaces/SideEffectInterfaces.td"
14
+ include "mlir/Interfaces/ViewLikeInterface.td"
13
15
include "mlir/IR/EnumAttr.td"
16
+ include "mlir/IR/Properties.td"
14
17
include "mlir/IR/OpBase.td"
15
18
16
19
def AMDGPU_Dialect : Dialect {
@@ -32,6 +35,45 @@ def AMDGPU_Dialect : Dialect {
32
35
let useDefaultAttributePrinterParser = 1;
33
36
}
34
37
38
+ //===----------------------------------------------------------------------===//
39
+ // AMDGPU general attribute definitions
40
+ //===----------------------------------------------------------------------===//
41
+
42
+ def AMDGPU_AddressSpace : I32EnumAttr<"AddressSpace",
43
+ "AMDGPU-specific address spaces",
44
+ [
45
+ I32EnumAttrCase<"FatRawBuffer", 0, "fat_raw_buffer">,
46
+ I32EnumAttrCase<"BufferRsrc", 1, "buffer_rsrc">,
47
+ I32EnumAttrCase<"FatStructuredBuffer", 2, "fat_structured_buffer">,
48
+ ]> {
49
+ let genSpecializedAttr = 0;
50
+ let cppNamespace = "::mlir::amdgpu";
51
+ }
52
+
53
+ def AMDGPU_AddressSpaceAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_AddressSpace,
54
+ "address_space"> {
55
+ let description = [{
56
+ AMDGPU-specific memory spaces that may not have exact analogues on other
57
+ GPU targets or backends.
58
+
59
+ - `fat_raw_buffer` is the memory space used when a memref is stored as
60
+ as a "buffer fat pointer" - that is, a buffer resource (that is set up to
61
+ use raw byte-level indexing) along with its offset. The AMDGPU backend
62
+ implements `ptr addrspace(7)` to represent these fat pointers so that
63
+ buffer resources (which allow advanced features like bounds checking or
64
+ cache swizzling) can be used like ordinary LLVM pointers or memrefs.
65
+ See also the `fat_raw_buffer_cast` operation
66
+ - `buffer_rsrc` is the memory space for `ptr addrspace(8)`, representing a
67
+ buffer resource. It should not be used for memrefs, since it does not support
68
+ indexing
69
+ - `fat_structured_buffer` represents `ptr addrspace(9)`, a buffer resource
70
+ that carries both an index and offset field, which are used for complex
71
+ structured indexing that is primarily seen in graphics applications. This
72
+ is also incompatible with the simple indexing model supported by memref.
73
+ }];
74
+ let assemblyFormat = "`<` $value `>`";
75
+ }
76
+
35
77
//===----------------------------------------------------------------------===//
36
78
// AMDGPU Op definitions
37
79
//===----------------------------------------------------------------------===//
@@ -118,6 +160,69 @@ def AMDGPU_PackedStochRoundFp8Op :
118
160
let hasVerifier = 1;
119
161
}
120
162
163
+ def AMDGPU_FatRawBufferCastOp :
164
+ AMDGPU_Op<"fat_raw_buffer_cast",
165
+ [Pure,
166
+ DeclareOpInterfaceMethods<InferTypeOpInterface>,
167
+ ViewLikeOpInterface, AttrSizedOperandSegments]>,
168
+ Arguments<(ins AnyMemRef:$source,
169
+ Optional<I32>:$validBytes,
170
+ Optional<I<14>>:$cacheSwizzleStride,
171
+ DefaultValuedProp<BoolProp, "true">:$boundsCheck,
172
+ UnitProp:$resetOffset)>,
173
+ Results<(outs AnyMemRef:$result)> {
174
+ let summary = "Create a raw buffer fat pointer that matches `memref`";
175
+ let description = [{
176
+ Wraps the memory pointed to by `source` as a raw buffer fat pointer, or,
177
+ in LLVM terms, a `ptr addrspace(7)`, returning a memref that has the same
178
+ sizes and layout but the `#amdgpu.address_space<fat_raw_buffer>`
179
+ address space.
180
+
181
+ This memref can be used with standard memref operations like `memref.load`,
182
+ `memref.store`, and `memref.atomicrmw`, which will be lowered to the relevant
183
+ buffer intrinsics. (`vector.masked_load/store` will work once there's backend
184
+ support for lowering them, and then this document will be updated)
185
+
186
+ If `validBytes` is given, it is the number of bytes that will be valid as
187
+ an offset to `out`. If it is not provided, this will be inferred from
188
+ the size of the memref during lowering. This size is
189
+ max_{d = 0 upto rank(source)} (sizes[d] * strides[d]) * sizeof(element type).
190
+
191
+ The flags of the buffer descriptor will be set up to enable raw usage -
192
+ for example, stride = 0, add_tid = 0, and so on. The `boundsCheck`
193
+ property determines if bounds checking is enabled or not (on architectures
194
+ where this can be controlled - that is, on RDNA chips).
195
+
196
+ If `cacheSwizzleStride` is provided, L1 cache swizzling will be enabled
197
+ on architectures that support it. This swizzling, unlike the main swizzling
198
+ mode (whose usage makes a buffer non-raw) does not affect index calculation,
199
+ but does affect cache behavior. Mixing access between cache-swizzled raw
200
+ buffers and other forms of memory access, like ordinary pointer loads or
201
+ unswizzled buffer pointers can cause incorrect behavior and must be avoided.
202
+
203
+ This operation preserves the sizes, strides, and offset of the input
204
+ memref - they'll be added in by `memref.load` later. However, if
205
+ `resetOffset` is set, that offset will be added to the base pointer.
206
+ If the value of the memref's offset is not uniform (independent of the lane/thread ID),
207
+ this will lead to substantially decreased performance due to the need for
208
+ a waterfall loop on the base address of the buffer resource.
209
+ }];
210
+
211
+ let extraClassDeclaration = [{
212
+ Value getViewSource() { return getSource(); }
213
+ }];
214
+
215
+ let assemblyFormat = [{
216
+ $source oilist (`validBytes` `(` $validBytes `)`
217
+ | `cacheSwizzleStride` `(` $cacheSwizzleStride `)`
218
+ | `boundsCheck` `(` $boundsCheck `)`
219
+ | `resetOffset` $resetOffset )
220
+ attr-dict `:` type($source) `to` type($result)
221
+ }];
222
+
223
+ let hasVerifier = 1;
224
+ }
225
+
121
226
/// Raw buffer load
122
227
def AMDGPU_RawBufferLoadOp :
123
228
AMDGPU_Op<"raw_buffer_load", [AllElementTypesMatch<["value", "memref"]>,
0 commit comments