Skip to content

Commit 9e8aa6a

Browse files
Implemented GPU runner (#362)
1 parent 50a9bfc commit 9e8aa6a

File tree

15 files changed

+275
-52
lines changed

15 files changed

+275
-52
lines changed

cmake/functions.cmake

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,36 @@ function(gc_add_mlir_dialect_library name)
117117
if(GcInterface IN_LIST ARGN)
118118
target_link_libraries(obj.${name} PUBLIC GcInterface)
119119
endif()
120-
endfunction()
120+
endfunction()
121+
122+
macro(gc_add_mlir_tool name)
123+
# the dependency list copied from mlir/tools/mlir-cpu-runner/CMakeLists.txt of upstream
124+
if(NOT DEFINED LLVM_LINK_COMPONENTS)
125+
set(LLVM_LINK_COMPONENTS
126+
Core
127+
Support
128+
nativecodegen
129+
native
130+
)
131+
endif()
132+
if(NOT DEFINED MLIR_LINK_COMPONENTS)
133+
gc_set_mlir_link_components(MLIR_LINK_COMPONENTS
134+
MLIRAnalysis
135+
MLIRBuiltinToLLVMIRTranslation
136+
MLIRExecutionEngine
137+
MLIRIR
138+
MLIRJitRunner
139+
MLIRLLVMDialect
140+
MLIRLLVMToLLVMIRTranslation
141+
MLIRToLLVMIRTranslationRegistration
142+
MLIRParser
143+
MLIRTargetLLVMIRExport
144+
MLIRSupport
145+
)
146+
endif()
147+
add_mlir_tool(${ARGV})
148+
#LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable
149+
target_link_libraries(${name} PRIVATE GcInterface ${MLIR_LINK_COMPONENTS})
150+
llvm_update_compile_flags(${name})
151+
set_property(GLOBAL APPEND PROPERTY GC_TOOLS ${name})
152+
endmacro()

include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ struct OclModule {
240240

241241
struct OclModuleBuilderOpts {
242242
StringRef funcName = {};
243+
bool printIr = false;
243244
bool enableObjectDump = false;
244245
ArrayRef<StringRef> sharedLibPaths = {};
245246
void (*pipeline)(OpPassManager &) = nullptr;
@@ -267,6 +268,7 @@ struct OclModuleBuilder {
267268

268269
private:
269270
ModuleOp mlirModule;
271+
const bool printIr;
270272
const bool enableObjectDump;
271273
const ArrayRef<StringRef> sharedLibPaths;
272274
void (*const pipeline)(OpPassManager &);

lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,8 @@ ArrayRef<Type> getArgTypes(const StringRef &funcName, ModuleOp &mod) {
749749

750750
OclModuleBuilder::OclModuleBuilder(ModuleOp module,
751751
const OclModuleBuilderOpts &opts)
752-
: mlirModule(module), enableObjectDump(opts.enableObjectDump),
752+
: mlirModule(module), printIr(opts.printIr),
753+
enableObjectDump(opts.enableObjectDump),
753754
sharedLibPaths(opts.sharedLibPaths),
754755
pipeline(opts.pipeline
755756
? opts.pipeline
@@ -799,6 +800,10 @@ OclModuleBuilder::build(const OclRuntime::Ext &ext) {
799800

800801
auto staticMain = createStaticMain(mod, funcName, argTypes);
801802

803+
if (printIr) {
804+
mod.dump();
805+
}
806+
802807
ExecutionEngineOptions opts;
803808
opts.jitCodeGenOptLevel = llvm::CodeGenOptLevel::Aggressive;
804809
opts.enableObjectDump = enableObjectDump;

src/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
################################################################################
2+
# Copyright (C) 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing,
11+
# software distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions
14+
# and limitations under the License.
15+
# SPDX-License-Identifier: Apache-2.0
16+
################################################################################
17+
118
add_subdirectory(dnnl)
219
add_subdirectory(gc-cpu-runner)
20+
add_subdirectory(gc-gpu-runner)
321
add_subdirectory(gc-opt)

src/gc-cpu-runner/CMakeLists.txt

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,38 +29,8 @@ if(GC_DEV_LINK_LLVM_DYLIB)
2929
MLIRExecutionEngineShared
3030
MLIRJitRunner
3131
)
32-
else()
33-
# the dependency list copied from mlir/tools/mlir-cpu-runner/CMakeLists.txt of upstream
34-
set(LLVM_LINK_COMPONENTS
35-
Core
36-
Support
37-
nativecodegen
38-
native
39-
)
40-
set(MLIR_LINK_COMPONENTS
41-
MLIRAnalysis
42-
MLIRBuiltinToLLVMIRTranslation
43-
MLIRExecutionEngine
44-
MLIRIR
45-
MLIRJitRunner
46-
MLIRLLVMDialect
47-
MLIRLLVMToLLVMIRTranslation
48-
MLIRToLLVMIRTranslationRegistration
49-
MLIRParser
50-
MLIRTargetLLVMIRExport
51-
MLIRSupport
52-
)
5332
endif()
5433

55-
#LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable
56-
set(gc_cpu_runner_libs
57-
${MLIR_LINK_COMPONENTS}
58-
GcCpuRuntime)
59-
add_mlir_tool(gc-cpu-runner
60-
gc-cpu-runner.cpp
61-
62-
)
63-
llvm_update_compile_flags(gc-cpu-runner)
64-
65-
target_link_libraries(gc-cpu-runner PRIVATE GcInterface ${gc_cpu_runner_libs})
34+
gc_add_mlir_tool(gc-cpu-runner gc-cpu-runner.cpp)
35+
target_link_libraries(gc-cpu-runner PRIVATE GcCpuRuntime)
6636
mlir_check_all_link_libraries(gc-cpu-runner)

src/gc-gpu-runner/CMakeLists.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
################################################################################
2+
# Copyright (C) 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing,
11+
# software distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions
14+
# and limitations under the License.
15+
# SPDX-License-Identifier: Apache-2.0
16+
################################################################################
17+
18+
if(NOT GC_ENABLE_TOOLS OR NOT GC_ENABLE_IMEX)
19+
message(STATUS "Gpu runner is not enabled.")
20+
return()
21+
endif()
22+
23+
gc_add_mlir_tool(gc-gpu-runner GpuRunner.cpp)
24+
target_link_libraries(gc-gpu-runner PRIVATE
25+
GcJitWrapper
26+
GcGpuOclRuntime
27+
)
28+
mlir_check_all_link_libraries(gc-gpu-runner)

src/gc-gpu-runner/GpuRunner.cpp

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
* Copyright (C) 2024 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing,
11+
* software distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions
14+
* and limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
*/
18+
19+
#include "gc/ExecutionEngine/Driver/Driver.h"
20+
#include "gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h"
21+
#include "gc/Transforms/Passes.h"
22+
#include "gc/Utils/Error.h"
23+
24+
#include "mlir/Dialect/Arith/IR/Arith.h"
25+
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
26+
#include "mlir/ExecutionEngine/JitRunner.h"
27+
#include "mlir/ExecutionEngine/OptUtils.h"
28+
#include "mlir/IR/MLIRContext.h"
29+
#include "mlir/Support/FileUtilities.h"
30+
#include "mlir/Tools/ParseUtilities.h"
31+
#include "mlir/Transforms/Passes.h"
32+
33+
#include "llvm/Support/CommandLine.h"
34+
#include "llvm/Support/InitLLVM.h"
35+
#include "llvm/Support/SourceMgr.h"
36+
37+
using namespace mlir;
38+
39+
namespace {
40+
struct Options {
41+
llvm::cl::OptionCategory runnerCategory{"GPU runner options"};
42+
llvm::cl::opt<std::string> inputFilename{
43+
llvm::cl::Positional, llvm::cl::desc("<input file>"), llvm::cl::init("-"),
44+
llvm::cl::cat(runnerCategory)};
45+
llvm::cl::opt<std::string> mainFuncName{
46+
"e",
47+
llvm::cl::desc("The function to be executed. If not specified, the "
48+
"first matching function in the module to be used."),
49+
llvm::cl::value_desc("function name"), llvm::cl::cat(runnerCategory)};
50+
llvm::cl::opt<bool> skipPipeline{
51+
"skip-pipeline",
52+
llvm::cl::desc("Skip the GPU pipeline. It's expected, that the input is "
53+
"already lowered with 'gc-op --gc-gpu-pipeline'."),
54+
llvm::cl::init(false), llvm::cl::cat(runnerCategory)};
55+
llvm::cl::list<std::string> sharedLibs{
56+
"shared-libs",
57+
llvm::cl::desc("Comma separated library paths to link dynamically."),
58+
llvm::cl::MiscFlags::CommaSeparated, llvm::cl::desc("<lib1,lib2,...>"),
59+
llvm::cl::cat(runnerCategory)};
60+
llvm::cl::opt<bool> printIr{
61+
"print-ir",
62+
llvm::cl::desc("Print the resulting IR before the execution."),
63+
llvm::cl::init(false), llvm::cl::cat(runnerCategory)};
64+
llvm::cl::opt<std::string> objDumpFile{
65+
"obj-dump-file",
66+
llvm::cl::desc("Dump the compiled object to the specified file."),
67+
llvm::cl::value_desc("file path"), llvm::cl::cat(runnerCategory)};
68+
};
69+
} // namespace
70+
71+
void findFunc(Options &opts, ModuleOp mod) {
72+
bool (*matcher)(ArrayRef<Type>, ModuleOp &);
73+
74+
if (opts.skipPipeline) {
75+
matcher = [](ArrayRef<Type> args, ModuleOp &mod) {
76+
if (args.size() != 3)
77+
return false;
78+
auto ctx = mod.getContext();
79+
auto ptrType = LLVM::LLVMPointerType::get(ctx);
80+
return args[0] == ptrType && args[1] == ptrType &&
81+
args[2] == IntegerType::get(ctx, 64);
82+
};
83+
} else {
84+
matcher = [](ArrayRef<Type> args, ModuleOp &) { return args.empty(); };
85+
}
86+
87+
if (opts.mainFuncName.empty()) {
88+
auto setFuncName = [&](auto funcOp) {
89+
if (funcOp && !funcOp.isExternal() && funcOp.isPublic() &&
90+
matcher(funcOp.getArgumentTypes(), mod)) {
91+
opts.mainFuncName = funcOp.getName().str();
92+
return true;
93+
}
94+
return false;
95+
};
96+
97+
for (auto &op : mod.getBody()->getOperations()) {
98+
if (setFuncName(dyn_cast<LLVM::LLVMFuncOp>(op)) ||
99+
setFuncName(dyn_cast<func::FuncOp>(op))) {
100+
return;
101+
}
102+
}
103+
gcReportErr("No matching function found.");
104+
}
105+
106+
ArrayRef<Type> args;
107+
if (auto llvmFunc = mod.lookupSymbol<LLVM::LLVMFuncOp>(opts.mainFuncName)) {
108+
args = llvmFunc.getArgumentTypes();
109+
} else if (auto func = mod.lookupSymbol<func::FuncOp>(opts.mainFuncName)) {
110+
args = func.getArgumentTypes();
111+
} else {
112+
gcReportErr("The function '", opts.mainFuncName.c_str(), "' not found.");
113+
}
114+
115+
if (!matcher(args, mod)) {
116+
if (opts.skipPipeline) {
117+
gcReportErr("The function '", opts.mainFuncName.c_str(),
118+
"' signature does not match (!llvm.ptr, !llvm.ptr, i64).");
119+
}
120+
gcReportErr("The function '", opts.mainFuncName.c_str(),
121+
"' must have no arguments.");
122+
}
123+
}
124+
125+
int main(int argc, char **argv) {
126+
Options opts;
127+
llvm::cl::ParseCommandLineOptions(argc, argv, "GraphCompiler GPU runner\n");
128+
129+
std::string errMsg;
130+
auto file = openInputFile(opts.inputFilename, &errMsg);
131+
if (!file) {
132+
gcReportErr("Failed to read input IR: ", errMsg.c_str());
133+
}
134+
135+
auto srcMgr = std::make_shared<llvm::SourceMgr>();
136+
srcMgr->AddNewSourceBuffer(std::move(file), SMLoc());
137+
MLIRContext mlirCtx{gc::initCompilerAndGetDialects()};
138+
auto mlirMod = parseSourceFile<ModuleOp>(srcMgr, {&mlirCtx});
139+
findFunc(opts, *mlirMod);
140+
141+
gc::gpu::OclModuleBuilderOpts builderOpts;
142+
SmallVector<StringRef, 4> sharedLibs(opts.sharedLibs.begin(),
143+
opts.sharedLibs.end());
144+
builderOpts.funcName = opts.mainFuncName;
145+
builderOpts.printIr = opts.printIr;
146+
builderOpts.enableObjectDump = !opts.objDumpFile.getValue().empty();
147+
builderOpts.sharedLibPaths = sharedLibs;
148+
builderOpts.pipeline =
149+
opts.skipPipeline ? [](OpPassManager &) {} : [](OpPassManager &pm) {
150+
gc::GPUPipelineOptions pipelineOpts;
151+
pipelineOpts.isUsmArgs = false;
152+
pipelineOpts.callFinish = true;
153+
populateGPUPipeline(pm, pipelineOpts);
154+
};
155+
156+
gc::gpu::OclModuleBuilder builder{mlirMod, builderOpts};
157+
auto runtime = gcGetOrReport(gc::gpu::OclRuntime::get());
158+
auto oclMod = gcGetOrReport(builder.build(runtime));
159+
assert(oclMod->isStatic);
160+
161+
if (!opts.objDumpFile.getValue().empty()) {
162+
gcLogD("Dumping the compiled object to ", opts.objDumpFile.getValue());
163+
oclMod->dumpToObjectFile(opts.objDumpFile.getValue());
164+
}
165+
166+
auto queue = gcGetOrReport(runtime.createQueue());
167+
gc::gpu::OclContext ctx{runtime, queue};
168+
gc::gpu::StaticExecutor<0> exec{oclMod};
169+
gcLogD("Executing function ", opts.mainFuncName.c_str(), "()");
170+
exec(ctx);
171+
gcGetOrReport(ctx.finish());
172+
gcGetOrReport(runtime.releaseQueue(queue));
173+
return 0;
174+
}

test/mlir/test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ set(GC_OPT_TEST_DEPENDS
3232

3333
if(GC_ENABLE_IMEX)
3434
include(imex)
35-
list(APPEND GC_OPT_TEST_DEPENDS GcOpenclRuntime)
35+
list(APPEND GC_OPT_TEST_DEPENDS gc-gpu-runner)
3636
endif()
3737

3838
if(GC_ENABLE_BINDINGS_PYTHON)

test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose.mlir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
2-
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
3-
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
1+
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s
2+
43
module{
54

65
func.func @linalg_matmul(%arg0: tensor<128x256xf16>,

test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_128x64_transpose_sep.mlir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
2-
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
3-
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
1+
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s
2+
43
module{
54

65
func.func @linalg_matmul(%arg0: tensor<128x256xf16>,

test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
2-
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
3-
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
1+
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s
2+
43
module{
54

65
func.func @linalg_matmul(%arg0: tensor<64x64xf16>,

test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096.mlir

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
2-
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
3-
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
1+
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s
42

53
module {
64
func.func @linalg_mlp(%arg0: tensor<32x4096xf16>, %arg1: tensor<4096x4096xf16>, %arg2 : tensor<32x4096xf16>,

test/mlir/test/gc/gpu-runner/XeGPU/f16_mlp_32x4096x4096x4096_transpose.mlir

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
2-
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
3-
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
1+
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s
42

53
module {
64
func.func @linalg_mlp(%arg0: tensor<32x4096xf16>, %arg1: tensor<4096x4096xf16>, %arg2 : tensor<32x4096xf16>,

0 commit comments

Comments
 (0)