Skip to content

Commit 5beabe6

Browse files
committed
[mlir][target][nvvm] Perf by stage and store into properties'
1 parent 9cd83d6 commit 5beabe6

File tree

3 files changed

+97
-6
lines changed

3 files changed

+97
-6
lines changed

mlir/lib/Target/LLVM/NVVM/Target.cpp

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
#include <cstdint>
4141
#include <cstdlib>
42+
#include <optional>
4243

4344
using namespace mlir;
4445
using namespace mlir::NVVM;
@@ -220,6 +221,16 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
220221
std::optional<SmallVector<char, 0>>
221222
moduleToObject(llvm::Module &llvmModule) override;
222223

224+
/// Get LLVMIR->ISA performance result.
225+
/// Return nullopt if moduleToObject has not been called or the target format
226+
/// is LLVMIR.
227+
std::optional<int64_t> getLLVMIRToISAPerfResult();
228+
229+
/// Get ISA->Binary performance result.
230+
/// Return nullopt if moduleToObject has not been called or the target format
231+
/// is LLVMIR or ISA.
232+
std::optional<int64_t> getISAToBinaryPerfResult();
233+
223234
private:
224235
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
225236

@@ -235,13 +246,20 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
235246

236247
/// Target options.
237248
gpu::TargetOptions targetOptions;
249+
250+
/// LLVMIR->ISA perf result.
251+
std::optional<int64_t> llvmToISAPerfResult;
252+
253+
/// ISA->Binary perf result.
254+
std::optional<int64_t> isaToBinaryPerfResult;
238255
};
239256
} // namespace
240257

241258
NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
242259
const gpu::TargetOptions &targetOptions)
243260
: SerializeGPUModuleBase(module, target, targetOptions),
244-
targetOptions(targetOptions) {}
261+
targetOptions(targetOptions), llvmToISAPerfResult(std::nullopt),
262+
isaToBinaryPerfResult(std::nullopt) {}
245263

246264
std::optional<NVPTXSerializer::TmpFile>
247265
NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
@@ -256,6 +274,14 @@ NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
256274
return TmpFile(filename, llvm::FileRemover(filename.c_str()));
257275
}
258276

277+
std::optional<int64_t> NVPTXSerializer::getLLVMIRToISAPerfResult() {
278+
return llvmToISAPerfResult;
279+
}
280+
281+
std::optional<int64_t> NVPTXSerializer::getISAToBinaryPerfResult() {
282+
return isaToBinaryPerfResult;
283+
}
284+
259285
gpu::GPUModuleOp NVPTXSerializer::getOperation() {
260286
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
261287
}
@@ -618,6 +644,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
618644

619645
std::optional<SmallVector<char, 0>>
620646
NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
647+
std::chrono::high_resolution_clock::time_point llvmPoint =
648+
std::chrono::high_resolution_clock::now();
621649
// Return LLVM IR if the compilation target is `offload`.
622650
#define DEBUG_TYPE "serialize-to-llvm"
623651
LLVM_DEBUG({
@@ -650,6 +678,11 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
650678
getOperation().emitError() << "Failed translating the module to ISA.";
651679
return std::nullopt;
652680
}
681+
std::chrono::high_resolution_clock::time_point ptxPoint =
682+
std::chrono::high_resolution_clock::now();
683+
llvmToISAPerfResult = std::chrono::duration_cast<std::chrono::milliseconds>(
684+
ptxPoint - llvmPoint)
685+
.count();
653686
if (isaCallback)
654687
isaCallback(serializedISA.value());
655688

@@ -669,17 +702,26 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
669702
return SmallVector<char, 0>(bin.begin(), bin.end());
670703
}
671704

705+
std::optional<SmallVector<char, 0>> result;
672706
// Compile to binary.
673707
#if MLIR_ENABLE_NVPTXCOMPILER
674-
return compileToBinaryNVPTX(*serializedISA);
708+
result = compileToBinaryNVPTX(*serializedISA);
675709
#else
676-
return compileToBinary(*serializedISA);
710+
result = compileToBinary(*serializedISA);
677711
#endif // MLIR_ENABLE_NVPTXCOMPILER
712+
713+
std::chrono::high_resolution_clock::time_point binaryPoint =
714+
std::chrono::high_resolution_clock::now();
715+
isaToBinaryPerfResult = std::chrono::duration_cast<std::chrono::milliseconds>(
716+
binaryPoint - ptxPoint)
717+
.count();
718+
return result;
678719
}
679720

680721
std::optional<SmallVector<char, 0>>
681722
NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
682723
const gpu::TargetOptions &options) const {
724+
Builder builder(attribute.getContext());
683725
assert(module && "The module must be non null.");
684726
if (!module)
685727
return std::nullopt;
@@ -689,7 +731,16 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
689731
}
690732
NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
691733
serializer.init();
692-
return serializer.run();
734+
std::optional<SmallVector<char, 0>> result = serializer.run();
735+
auto llvmToISAPerfResult = serializer.getLLVMIRToISAPerfResult();
736+
if (llvmToISAPerfResult.has_value())
737+
module->setAttr("LLVMIRToPTXTimeCost",
738+
builder.getI64IntegerAttr(*llvmToISAPerfResult));
739+
auto isaToBinaryPerfResult = serializer.getLLVMIRToISAPerfResult();
740+
if (isaToBinaryPerfResult.has_value())
741+
module->setAttr("PTXToBinaryTimeCost",
742+
builder.getI64IntegerAttr(*isaToBinaryPerfResult));
743+
return result;
693744
}
694745

695746
Attribute
@@ -700,7 +751,7 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
700751
gpu::CompilationTarget format = options.getCompilationTarget();
701752
DictionaryAttr objectProps;
702753
Builder builder(attribute.getContext());
703-
SmallVector<NamedAttribute, 2> properties;
754+
SmallVector<NamedAttribute, 4> properties;
704755
if (format == gpu::CompilationTarget::Assembly)
705756
properties.push_back(
706757
builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
@@ -709,6 +760,14 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
709760
properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
710761
builder.getStringAttr(section)));
711762

763+
for (const auto *perfName : {"LLVMIRToPTXTimeCost", "PTXToBinaryTimeCost"}) {
764+
if (module->hasAttr(perfName)) {
765+
IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
766+
properties.push_back(builder.getNamedAttr(
767+
perfName, builder.getI64IntegerAttr(attr.getInt())));
768+
}
769+
}
770+
712771
if (!properties.empty())
713772
objectProps = builder.getDictionaryAttr(properties);
714773

mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ module attributes {gpu.container_module} {
1616
}
1717

1818
// CHECK-LABEL:gpu.binary @kernel_module2
19-
// CHECK-ISA:[#gpu.object<#nvvm.target<flags = {fast}>, properties = {O = 2 : i32}, assembly = "{{.*}}">, #gpu.object<#nvvm.target, properties = {O = 2 : i32}, assembly = "{{.*}}">]
19+
// CHECK-ISA:[#gpu.object<#nvvm.target<flags = {fast}>, properties = {O = 2 : i32, LLVMIRToPTXTimeCost = "{{[0-9]+}}" : i64}, assembly = "{{.*}}">, #gpu.object<#nvvm.target, properties = {O = 2 : i32}, assembly = "{{.*}}">]
2020
gpu.module @kernel_module2 [#nvvm.target<flags = {fast}>, #nvvm.target] {
2121
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
2222
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,

mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,35 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
296296
ASSERT_TRUE(!object->empty());
297297
}
298298
}
299+
300+
// Test performance results are injected into module.
301+
TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(Stage)) {
302+
MLIRContext context(registry);
303+
304+
OwningOpRef<ModuleOp> module =
305+
parseSourceString<ModuleOp>(moduleStr, &context);
306+
ASSERT_TRUE(!!module);
307+
308+
NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
309+
310+
auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
311+
ASSERT_TRUE(!!serializer);
312+
313+
gpu::TargetOptions options({}, {}, {}, {}, gpu::CompilationTarget::Assembly);
314+
315+
for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
316+
std::optional<SmallVector<char, 0>> object =
317+
serializer.serializeToObject(gpuModule, options);
318+
ASSERT_TRUE(object != std::nullopt);
319+
ASSERT_TRUE(!object->empty());
320+
ASSERT_TRUE(gpuModule->hasAttr("LLVMIRToPTXTimeCost"));
321+
ASSERT_TRUE(gpuModule->hasAttr("PTXToBinaryTimeCost"));
322+
323+
Attribute attr = serializer.createObject(gpuModule, *object, options);
324+
ASSERT_TRUE(!!attr);
325+
auto objectAttr = cast<gpu::ObjectAttr>(attr);
326+
auto props = objectAttr.getProperties();
327+
ASSERT_TRUE(!!props.get("LLVMIRToPTXTimeCost"));
328+
ASSERT_TRUE(!!props.get("PTXToBinaryTimeCost"));
329+
}
330+
}

0 commit comments

Comments
 (0)