Skip to content

Commit 4818623

Browse files
authored
[flang][cuda] Add cuf.shared_memory operation (#131392)
Introduce `cuf.shared_memory` operation. The operation is used to get the pointer in shared memory for a specific variable. The shared memory is materialized as a global in address space 3 and the different variables are pointing to it at different offset. Follow up patches will add lowering and conversion of this operation.
1 parent 471f034 commit 4818623

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,4 +349,33 @@ def cuf_DeviceAddressOp : cuf_Op<"device_address", []> {
349349
let results = (outs fir_ReferenceType:$addr);
350350
}
351351

352+
def cuf_SharedMemoryOp
353+
: cuf_Op<"shared_memory", [AttrSizedOperandSegments, Pure]> {
354+
let summary = "Get the pointer to the kernel shared memory";
355+
356+
let description = [{
357+
Return the pointer in the shared memory relative to the specified offset.
358+
}];
359+
360+
let arguments = (ins TypeAttr:$in_type, OptionalAttr<StrAttr>:$uniq_name,
361+
OptionalAttr<StrAttr>:$bindc_name, Variadic<AnyIntegerType>:$typeparams,
362+
Variadic<AnyIntegerType>:$shape,
363+
OptionalAttr<I32Attr>:$offset // offset in bytes from the shared memory
364+
// base address.
365+
);
366+
367+
let results = (outs fir_ReferenceType:$ptr);
368+
369+
let assemblyFormat = [{
370+
$in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
371+
(`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
372+
}];
373+
374+
let builders = [OpBuilder<(ins "mlir::Type":$inType,
375+
"llvm::StringRef":$uniqName, "llvm::StringRef":$bindcName,
376+
CArg<"mlir::ValueRange", "{}">:$typeparams,
377+
CArg<"mlir::ValueRange", "{}">:$shape,
378+
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
379+
}
380+
352381
#endif // FORTRAN_DIALECT_CUF_CUF_OPS

flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,25 @@ mlir::LogicalResult cuf::RegisterKernelOp::verify() {
300300
return emitOpError("device function not found");
301301
}
302302

303+
//===----------------------------------------------------------------------===//
304+
// SharedMemoryOp
305+
//===----------------------------------------------------------------------===//
306+
307+
void cuf::SharedMemoryOp::build(
308+
mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType,
309+
llvm::StringRef uniqName, llvm::StringRef bindcName,
310+
mlir::ValueRange typeparams, mlir::ValueRange shape,
311+
llvm::ArrayRef<mlir::NamedAttribute> attributes) {
312+
mlir::StringAttr nameAttr =
313+
uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
314+
mlir::StringAttr bindcAttr =
315+
bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
316+
build(builder, result, wrapAllocaResultType(inType),
317+
mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
318+
mlir::IntegerAttr{});
319+
result.addAttributes(attributes);
320+
}
321+
303322
// Tablegen operators
304323

305324
#define GET_OP_CLASSES

flang/test/Fir/cuf.mlir

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,30 @@ func.func @_QPsub1() {
8686
// CHECK: cuf.alloc
8787
// CHECK: cuf.free
8888

89+
// -----
90+
91+
gpu.module @cuda_device_mod {
92+
gpu.func @_QPdynshared() kernel {
93+
%c-1 = arith.constant -1 : index
94+
%6 = cuf.shared_memory !fir.array<?xf32>, %c-1 : index {bindc_name = "r", uniq_name = "_QFdynsharedEr"} -> !fir.ref<!fir.array<?xf32>>
95+
%7 = fir.shape %c-1 : (index) -> !fir.shape<1>
96+
%8 = fir.declare %6(%7) {data_attr = #cuf.cuda<shared>, uniq_name = "_QFdynsharedEr"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xf32>>
97+
gpu.return
98+
}
99+
}
100+
101+
// CHECK: cuf.shared_memory
102+
103+
// -----
104+
105+
gpu.module @cuda_device_mod {
106+
gpu.func @_QPshared_static() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
107+
%0 = cuf.shared_memory i32 {bindc_name = "a", uniq_name = "_QFshared_staticEa"} -> !fir.ref<i32>
108+
%1 = fir.declare %0 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFshared_staticEa"} : (!fir.ref<i32>) -> !fir.ref<i32>
109+
%2 = cuf.shared_memory i32 {bindc_name = "b", uniq_name = "_QFshared_staticEb"} -> !fir.ref<i32>
110+
%3 = fir.declare %2 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFshared_staticEb"} : (!fir.ref<i32>) -> !fir.ref<i32>
111+
gpu.return
112+
}
113+
}
114+
115+
// CHECK-COUNT-2: cuf.shared_memory

0 commit comments

Comments
 (0)