Skip to content

Commit 9ff990f

Browse files
committed
Fixed cuda flag
1 parent 1f4948f commit 9ff990f

File tree

3 files changed

+37
-71
lines changed

3 files changed

+37
-71
lines changed

clang/include/clang/Interpreter/Interpreter.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ class Interpreter {
9595
// An optional parser for CUDA offloading
9696
std::unique_ptr<IncrementalParser> DeviceParser;
9797

98+
// An optional action for CUDA offloading
99+
std::unique_ptr<IncrementalAction> DeviceAct;
100+
98101
/// List containing information about each incrementally parsed piece of code.
99102
std::list<PartialTranslationUnit> PTUs;
100103

@@ -176,10 +179,11 @@ class Interpreter {
176179
llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
177180
llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
178181

179-
CodeGenerator *getCodeGen() const;
180-
std::unique_ptr<llvm::Module> GenModule();
182+
CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const;
183+
std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr);
181184
PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU,
182-
std::unique_ptr<llvm::Module> M = {});
185+
std::unique_ptr<llvm::Module> M = {},
186+
IncrementalAction *Action = nullptr);
183187

184188
// A cache for the compiled destructors used to for de-allocation of managed
185189
// clang::Values.

clang/lib/Interpreter/DeviceOffload.cpp

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -51,47 +51,16 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
5151
if (!PTU)
5252
return PTU.takeError();
5353

54-
// auto PTX = GeneratePTX();
55-
// if (!PTX)
56-
// return PTX.takeError();
57-
58-
// auto Err = GenerateFatbinary();
59-
// if (Err)
60-
// return std::move(Err);
61-
62-
// std::string FatbinFileName =
63-
// "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
64-
// VFS->addFile(FatbinFileName, 0,
65-
// llvm::MemoryBuffer::getMemBuffer(
66-
// llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
67-
// "", false));
68-
69-
// CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
70-
71-
// FatbinContent.clear();
72-
7354
return PTU;
7455
}
7556

7657
llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
77-
llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
78-
assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
7958
auto &PTU = PTUs.back();
8059
std::string Error;
8160

82-
if (!PTU.TheModule) {
83-
llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n";
84-
} else {
85-
llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
86-
}
87-
88-
llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
89-
llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n";
90-
9161
const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
9262
PTU.TheModule->getTargetTriple(), Error);
9363
if (!Target) {
94-
llvm::errs() << ">>> Failed to lookup target: " << Error << "\n";
9564
return llvm::make_error<llvm::StringError>(std::move(Error),
9665
std::error_code());
9766
}
@@ -187,8 +156,9 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
187156

188157
FatbinContent.append(PTXCode.begin(), PTXCode.end());
189158

190-
std::string FatbinFileName =
191-
"/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
159+
auto &PTU = PTUs.back();
160+
161+
std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
192162

193163
VFS->addFile(FatbinFileName, 0,
194164
llvm::MemoryBuffer::getMemBuffer(
@@ -202,18 +172,6 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
202172
return llvm::Error::success();
203173
}
204174

205-
// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) {
206-
// std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
207-
208-
// VFS->addFile(FatbinFileName, 0,
209-
// llvm::MemoryBuffer::getMemBuffer(
210-
// llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
211-
// "", false));
212-
213-
// CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
214-
// FatbinContent.clear();
215-
// }
216-
217175
IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
218176

219177
} // namespace clang

clang/lib/Interpreter/Interpreter.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -459,14 +459,28 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
459459
if (Err)
460460
return std::move(Err);
461461

462+
CompilerInstance &HostCI = *(Interp->getCompilerInstance());
463+
462464
if (DeviceCI) {
463-
DeviceCI->ExecuteAction(*Interp->Act);
465+
Interp->DeviceAct = std::make_unique<IncrementalAction>(
466+
*DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp);
467+
468+
if (Err)
469+
return std::move(Err);
470+
471+
DeviceCI->ExecuteAction(*Interp->DeviceAct);
464472

473+
// avoid writing fat binary to disk using an in-memory virtual file system
465474
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
466475
std::make_unique<llvm::vfs::InMemoryFileSystem>();
476+
llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
477+
std::make_unique<llvm::vfs::OverlayFileSystem>(
478+
llvm::vfs::getRealFileSystem());
479+
OverlayVFS->pushOverlay(IMVFS);
480+
HostCI.createFileManager(OverlayVFS);
467481

468482
auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
469-
std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
483+
std::move(DeviceCI), HostCI, IMVFS, Err,
470484
Interp->PTUs);
471485

472486
if (Err)
@@ -489,15 +503,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
489503
llvm::Expected<std::unique_ptr<Interpreter>>
490504
Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
491505
std::unique_ptr<CompilerInstance> DCI) {
492-
// avoid writing fat binary to disk using an in-memory virtual file system
493-
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
494-
std::make_unique<llvm::vfs::InMemoryFileSystem>();
495-
llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
496-
std::make_unique<llvm::vfs::OverlayFileSystem>(
497-
llvm::vfs::getRealFileSystem());
498-
OverlayVFS->pushOverlay(IMVFS);
499-
CI->createFileManager(OverlayVFS);
500-
501506
return Interpreter::create(std::move(CI), std::move(DCI));
502507
}
503508

@@ -536,15 +541,16 @@ size_t Interpreter::getEffectivePTUSize() const {
536541

537542
PartialTranslationUnit &
538543
Interpreter::RegisterPTU(TranslationUnitDecl *TU,
539-
std::unique_ptr<llvm::Module> M /*={}*/) {
544+
std::unique_ptr<llvm::Module> M /*={}*/,
545+
IncrementalAction *Action) {
540546
PTUs.emplace_back(PartialTranslationUnit());
541547
PartialTranslationUnit &LastPTU = PTUs.back();
542548
LastPTU.TUPart = TU;
543549

544550
if (!M)
545-
M = GenModule();
551+
M = GenModule(Action);
546552

547-
assert((!getCodeGen() || M) && "Must have a llvm::Module at this point");
553+
assert((!getCodeGen(Action) || M) && "Must have a llvm::Module at this point");
548554

549555
LastPTU.TheModule = std::move(M);
550556
LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1
@@ -561,18 +567,14 @@ Interpreter::Parse(llvm::StringRef Code) {
561567
// If we have a device parser, parse it first. The generated code will be
562568
// included in the host compilation
563569
if (DeviceParser) {
564-
llvm::errs() << "[CUDA] Parsing device code...\n";
565570
llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
566571
if (auto E = DeviceTU.takeError()) {
567-
llvm::errs() << "[CUDA] Device Parse failed!\n";
568572
return std::move(E);
569573
}
570-
llvm::errs() << "[CUDA] Device parse successful.\n";
571574

572575
auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
573-
llvm::errs() << "[CUDA] Registering device PTU...\n";
574576

575-
PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU);
577+
PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get());
576578

577579
llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
578580
if (!PTX)
@@ -757,9 +759,9 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) {
757759
return llvm::Error::success();
758760
}
759761

760-
std::unique_ptr<llvm::Module> Interpreter::GenModule() {
762+
std::unique_ptr<llvm::Module> Interpreter::GenModule(IncrementalAction *Action) {
761763
static unsigned ID = 0;
762-
if (CodeGenerator *CG = getCodeGen()) {
764+
if (CodeGenerator *CG = getCodeGen(Action)) {
763765
// Clang's CodeGen is designed to work with a single llvm::Module. In many
764766
// cases for convenience various CodeGen parts have a reference to the
765767
// llvm::Module (TheModule or Module) which does not change when a new
@@ -781,8 +783,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() {
781783
return nullptr;
782784
}
783785

784-
CodeGenerator *Interpreter::getCodeGen() const {
785-
FrontendAction *WrappedAct = Act->getWrapped();
786+
CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const {
787+
if (!Action)
788+
Action = Act.get();
789+
FrontendAction *WrappedAct = Action->getWrapped();
786790
if (!WrappedAct->hasIRSupport())
787791
return nullptr;
788792
return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();

0 commit comments

Comments
 (0)