|
| 1 | +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s |
| 2 | + |
| 3 | +// Checking the translation of the `gpu.binary` & `gpu.launch_fun` ops. |
| 4 | +module attributes {gpu.container_module} { |
| 5 | + // CHECK: [[ARGS_TY:%.*]] = type { i32, i32 } |
| 6 | + // CHECK: @kernel_module_bin_cst = internal constant [4 x i8] c"BLOB", align 8 |
| 7 | + // CHECK: @kernel_module_kernel_kernel_name = private unnamed_addr constant [7 x i8] c"kernel\00", align 1 |
| 8 | + gpu.binary @kernel_module [#gpu.object<#nvvm.target, "BLOB">] |
| 9 | + llvm.func @foo() { |
| 10 | + // CHECK: [[ARGS:%.*]] = alloca %{{.*}}, align 8 |
| 11 | + // CHECK: [[ARGS_ARRAY:%.*]] = alloca ptr, i64 2, align 8 |
| 12 | + // CHECK: [[ARG0:%.*]] = getelementptr inbounds [[ARGS_TY]], ptr [[ARGS]], i32 0, i32 0 |
| 13 | + // CHECK: store i32 32, ptr [[ARG0]], align 4 |
| 14 | + // CHECK: %{{.*}} = getelementptr ptr, ptr [[ARGS_ARRAY]], i32 0 |
| 15 | + // CHECK: store ptr [[ARG0]], ptr %{{.*}}, align 8 |
| 16 | + // CHECK: [[ARG1:%.*]] = getelementptr inbounds [[ARGS_TY]], ptr [[ARGS]], i32 0, i32 1 |
| 17 | + // CHECK: store i32 32, ptr [[ARG1]], align 4 |
| 18 | + // CHECK: %{{.*}} = getelementptr ptr, ptr [[ARGS_ARRAY]], i32 1 |
| 19 | + // CHECK: store ptr [[ARG1]], ptr %{{.*}}, align 8 |
| 20 | + // CHECK: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst) |
| 21 | + // CHECK: [[FUNC:%.*]] = call ptr @mgpuModuleGetFunction(ptr [[MODULE]], ptr @kernel_module_kernel_kernel_name) |
| 22 | + // CHECK: [[STREAM:%.*]] = call ptr @mgpuStreamCreate() |
| 23 | + // CHECK: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 256, ptr [[STREAM]], ptr [[ARGS_ARRAY]], ptr null) |
| 24 | + // CHECK: call void @mgpuStreamSynchronize(ptr [[STREAM]]) |
| 25 | + // CHECK: call void @mgpuStreamDestroy(ptr [[STREAM]]) |
| 26 | + // CHECK: call void @mgpuModuleUnload(ptr [[MODULE]]) |
| 27 | + %0 = llvm.mlir.constant(8 : index) : i64 |
| 28 | + %1 = llvm.mlir.constant(32 : i32) : i32 |
| 29 | + %2 = llvm.mlir.constant(256 : i32) : i32 |
| 30 | + gpu.launch_func @kernel_module::@kernel blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %2 args(%1 : i32, %1 : i32) |
| 31 | + llvm.return |
| 32 | + } |
| 33 | +} |
| 34 | + |
| 35 | +// ----- |
| 36 | + |
| 37 | +// Checking the correct selection of the second object using an index as a selector. |
| 38 | +module { |
| 39 | + // CHECK: @kernel_module_bin_cst = internal constant [1 x i8] c"1", align 8 |
| 40 | + gpu.binary @kernel_module <#gpu.select_object<1>> [#gpu.object<#nvvm.target, "0">, #gpu.object<#nvvm.target, "1">] |
| 41 | +} |
| 42 | + |
| 43 | +// ----- |
| 44 | + |
| 45 | +// Checking the correct selection of the second object using a target as a selector. |
| 46 | +module { |
| 47 | + // CHECK: @kernel_module_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8 |
| 48 | + gpu.binary @kernel_module <#gpu.select_object<#rocdl.target>> [#gpu.object<#nvvm.target, "NVPTX">, #gpu.object<#rocdl.target, "AMDGPU">] |
| 49 | +} |
| 50 | + |
| 51 | +// ----- |
| 52 | + |
| 53 | +// Checking the translation of `gpu.launch_fun` with an async dependency. |
| 54 | +module attributes {gpu.container_module} { |
| 55 | + // CHECK: @kernel_module_bin_cst = internal constant [4 x i8] c"BLOB", align 8 |
| 56 | + gpu.binary @kernel_module [#gpu.object<#rocdl.target, "BLOB">] |
| 57 | + llvm.func @foo() { |
| 58 | + %0 = llvm.mlir.constant(8 : index) : i64 |
| 59 | + // CHECK: = call ptr @mgpuStreamCreate() |
| 60 | + // CHECK-NEXT: = alloca {{.*}}, align 8 |
| 61 | + // CHECK-NEXT: [[ARGS:%.*]] = alloca ptr, i64 0, align 8 |
| 62 | + // CHECK-NEXT: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst) |
| 63 | + // CHECK-NEXT: [[FUNC:%.*]] = call ptr @mgpuModuleGetFunction(ptr [[MODULE]], ptr @kernel_module_kernel_kernel_name) |
| 64 | + // CHECK-NEXT: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 0, ptr {{.*}}, ptr [[ARGS]], ptr null) |
| 65 | + // CHECK-NEXT: call void @mgpuModuleUnload(ptr [[MODULE]]) |
| 66 | + // CHECK-NEXT: call void @mgpuStreamSynchronize(ptr %{{.*}}) |
| 67 | + // CHECK-NEXT: call void @mgpuStreamDestroy(ptr %{{.*}}) |
| 68 | + %1 = llvm.call @mgpuStreamCreate() : () -> !llvm.ptr |
| 69 | + gpu.launch_func <%1 : !llvm.ptr> @kernel_module::@kernel blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 |
| 70 | + llvm.call @mgpuStreamSynchronize(%1) : (!llvm.ptr) -> () |
| 71 | + llvm.call @mgpuStreamDestroy(%1) : (!llvm.ptr) -> () |
| 72 | + llvm.return |
| 73 | + } |
| 74 | + llvm.func @mgpuStreamCreate() -> !llvm.ptr |
| 75 | + llvm.func @mgpuStreamSynchronize(!llvm.ptr) |
| 76 | + llvm.func @mgpuStreamDestroy(!llvm.ptr) |
| 77 | +} |
0 commit comments