Skip to content

Commit a3cd2ee

Browse files
[mlir][nvgpu] Add a nvgpu.rewrite_copy_as_tma transform operation.
This revision adds support for direct lowering of a linalg.copy on buffers between global and shared memory to a tma async load + synchronization operations. This uses the recently introduced Hopper NVVM and NVGPU abstraction to connect things end to end. Differential Revision: https://reviews.llvm.org/D157087
1 parent b6d994d commit a3cd2ee

File tree

5 files changed

+533
-34
lines changed

5 files changed

+533
-34
lines changed

mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,33 @@ def RewriteMatmulAsMmaSyncOp :
164164
}];
165165
}
166166

167+
//===----------------------------------------------------------------------===//
168+
// RewriteCopyAsTmaOp
169+
//===----------------------------------------------------------------------===//
170+
171+
def RewriteCopyAsTmaOp :
172+
Op<Transform_Dialect, "nvgpu.rewrite_copy_as_tma",
173+
[FunctionalStyleTransformOpTrait,
174+
MemoryEffectsOpInterface,
175+
TransformEachOpTrait,
176+
TransformOpInterface,
177+
ReportTrackingListenerFailuresOpTrait]> {
178+
let description = [{
179+
Rewrite a copy operation on memref to tma operations that transit through
180+
shared memory.
181+
}];
182+
183+
let arguments = (ins TransformHandleTypeInterface:$target);
184+
let results = (outs);
185+
186+
let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) ";
187+
188+
let extraClassDeclaration = [{
189+
::mlir::DiagnosedSilenceableFailure apply(
190+
::mlir::transform::TransformRewriter &rewriter,
191+
::mlir::transform::TransformResults &transformResults,
192+
::mlir::transform::TransformState &state);
193+
}];
194+
}
195+
167196
#endif // NVGPU_TRANSFORM_OPS

0 commit comments

Comments
 (0)