39
39
40
40
#include < cstdint>
41
41
#include < cstdlib>
42
+ #include < optional>
42
43
43
44
using namespace mlir ;
44
45
using namespace mlir ::NVVM;
@@ -220,6 +221,16 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
220
221
std::optional<SmallVector<char , 0 >>
221
222
moduleToObject (llvm::Module &llvmModule) override ;
222
223
224
+ // / Get LLVMIR->ISA performance result.
225
+ // / Return nullopt if moduleToObject has not been called or the target format
226
+ // / is LLVMIR.
227
+ std::optional<int64_t > getLLVMIRToISAPerfResult ();
228
+
229
+ // / Get ISA->Binary performance result.
230
+ // / Return nullopt if moduleToObject has not been called or the target format
231
+ // / is LLVMIR or ISA.
232
+ std::optional<int64_t > getISAToBinaryPerfResult ();
233
+
223
234
private:
224
235
using TmpFile = std::pair<llvm::SmallString<128 >, llvm::FileRemover>;
225
236
@@ -235,13 +246,20 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
235
246
236
247
// / Target options.
237
248
gpu::TargetOptions targetOptions;
249
+
250
+ // / LLVMIR->ISA perf result.
251
+ std::optional<int64_t > llvmToISAPerfResult;
252
+
253
+ // / ISA->Binary perf result.
254
+ std::optional<int64_t > isaToBinaryPerfResult;
238
255
};
239
256
} // namespace
240
257
241
258
NVPTXSerializer::NVPTXSerializer (Operation &module , NVVMTargetAttr target,
242
259
const gpu::TargetOptions &targetOptions)
243
260
: SerializeGPUModuleBase(module , target, targetOptions),
244
- targetOptions(targetOptions) {}
261
+ targetOptions(targetOptions), llvmToISAPerfResult(std::nullopt),
262
+ isaToBinaryPerfResult(std::nullopt) {}
245
263
246
264
std::optional<NVPTXSerializer::TmpFile>
247
265
NVPTXSerializer::createTemp (StringRef name, StringRef suffix) {
@@ -256,6 +274,14 @@ NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
256
274
return TmpFile (filename, llvm::FileRemover (filename.c_str ()));
257
275
}
258
276
277
+ std::optional<int64_t > NVPTXSerializer::getLLVMIRToISAPerfResult () {
278
+ return llvmToISAPerfResult;
279
+ }
280
+
281
+ std::optional<int64_t > NVPTXSerializer::getISAToBinaryPerfResult () {
282
+ return isaToBinaryPerfResult;
283
+ }
284
+
259
285
gpu::GPUModuleOp NVPTXSerializer::getOperation () {
260
286
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation ());
261
287
}
@@ -618,6 +644,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
618
644
619
645
std::optional<SmallVector<char , 0 >>
620
646
NVPTXSerializer::moduleToObject (llvm::Module &llvmModule) {
647
+ std::chrono::high_resolution_clock::time_point llvmPoint =
648
+ std::chrono::high_resolution_clock::now ();
621
649
// Return LLVM IR if the compilation target is `offload`.
622
650
#define DEBUG_TYPE " serialize-to-llvm"
623
651
LLVM_DEBUG ({
@@ -650,6 +678,11 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
650
678
getOperation ().emitError () << " Failed translating the module to ISA." ;
651
679
return std::nullopt;
652
680
}
681
+ std::chrono::high_resolution_clock::time_point ptxPoint =
682
+ std::chrono::high_resolution_clock::now ();
683
+ llvmToISAPerfResult = std::chrono::duration_cast<std::chrono::milliseconds>(
684
+ ptxPoint - llvmPoint)
685
+ .count ();
653
686
if (isaCallback)
654
687
isaCallback (serializedISA.value ());
655
688
@@ -669,17 +702,26 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
669
702
return SmallVector<char , 0 >(bin.begin (), bin.end ());
670
703
}
671
704
705
+ std::optional<SmallVector<char , 0 >> result;
672
706
// Compile to binary.
673
707
#if MLIR_ENABLE_NVPTXCOMPILER
674
- return compileToBinaryNVPTX (*serializedISA);
708
+ result = compileToBinaryNVPTX (*serializedISA);
675
709
#else
676
- return compileToBinary (*serializedISA);
710
+ result = compileToBinary (*serializedISA);
677
711
#endif // MLIR_ENABLE_NVPTXCOMPILER
712
+
713
+ std::chrono::high_resolution_clock::time_point binaryPoint =
714
+ std::chrono::high_resolution_clock::now ();
715
+ isaToBinaryPerfResult = std::chrono::duration_cast<std::chrono::milliseconds>(
716
+ binaryPoint - ptxPoint)
717
+ .count ();
718
+ return result;
678
719
}
679
720
680
721
std::optional<SmallVector<char , 0 >>
681
722
NVVMTargetAttrImpl::serializeToObject (Attribute attribute, Operation *module ,
682
723
const gpu::TargetOptions &options) const {
724
+ Builder builder (attribute.getContext ());
683
725
assert (module && " The module must be non null." );
684
726
if (!module )
685
727
return std::nullopt;
@@ -689,7 +731,16 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
689
731
}
690
732
NVPTXSerializer serializer (*module , cast<NVVMTargetAttr>(attribute), options);
691
733
serializer.init ();
692
- return serializer.run ();
734
+ std::optional<SmallVector<char , 0 >> result = serializer.run ();
735
+ auto llvmToISAPerfResult = serializer.getLLVMIRToISAPerfResult ();
736
+ if (llvmToISAPerfResult.has_value ())
737
+ module ->setAttr (" LLVMIRToPTXTimeCost" ,
738
+ builder.getI64IntegerAttr (*llvmToISAPerfResult));
739
+ auto isaToBinaryPerfResult = serializer.getLLVMIRToISAPerfResult ();
740
+ if (isaToBinaryPerfResult.has_value ())
741
+ module ->setAttr (" PTXToBinaryTimeCost" ,
742
+ builder.getI64IntegerAttr (*isaToBinaryPerfResult));
743
+ return result;
693
744
}
694
745
695
746
Attribute
@@ -700,7 +751,7 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
700
751
gpu::CompilationTarget format = options.getCompilationTarget ();
701
752
DictionaryAttr objectProps;
702
753
Builder builder (attribute.getContext ());
703
- SmallVector<NamedAttribute, 2 > properties;
754
+ SmallVector<NamedAttribute, 4 > properties;
704
755
if (format == gpu::CompilationTarget::Assembly)
705
756
properties.push_back (
706
757
builder.getNamedAttr (" O" , builder.getI32IntegerAttr (target.getO ())));
@@ -709,6 +760,14 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
709
760
properties.push_back (builder.getNamedAttr (gpu::elfSectionName,
710
761
builder.getStringAttr (section)));
711
762
763
+ for (const auto *perfName : {" LLVMIRToPTXTimeCost" , " PTXToBinaryTimeCost" }) {
764
+ if (module ->hasAttr (perfName)) {
765
+ IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module ->getAttr (perfName));
766
+ properties.push_back (builder.getNamedAttr (
767
+ perfName, builder.getI64IntegerAttr (attr.getInt ())));
768
+ }
769
+ }
770
+
712
771
if (!properties.empty ())
713
772
objectProps = builder.getDictionaryAttr (properties);
714
773
0 commit comments