Skip to content

Commit ddeab07

Browse files
committed
[clang-repl][CUDA] Re-land: Initial interactive CUDA support for clang-repl
CUDA support can be enabled in clang-repl with --cuda flag. Device code linking is not yet supported. inline must be used with all __device__ functions. Differential Revision: https://reviews.llvm.org/D146389
1 parent fe01c08 commit ddeab07

22 files changed

+591
-27
lines changed

clang/include/clang/Interpreter/Interpreter.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,34 @@ class IncrementalParser;
4242
/// Create a pre-configured \c CompilerInstance for incremental processing.
4343
class IncrementalCompilerBuilder {
4444
public:
45+
IncrementalCompilerBuilder() {}
46+
47+
void SetCompilerArgs(const std::vector<const char *> &Args) {
48+
UserArgs = Args;
49+
}
50+
51+
// General C++
52+
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCpp();
53+
54+
// Offload options
55+
void SetOffloadArch(llvm::StringRef Arch) { OffloadArch = Arch; };
56+
57+
// CUDA specific
58+
void SetCudaSDK(llvm::StringRef path) { CudaSDKPath = path; };
59+
60+
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCudaHost();
61+
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCudaDevice();
62+
63+
private:
4564
static llvm::Expected<std::unique_ptr<CompilerInstance>>
4665
create(std::vector<const char *> &ClangArgv);
66+
67+
llvm::Expected<std::unique_ptr<CompilerInstance>> createCuda(bool device);
68+
69+
std::vector<const char *> UserArgs;
70+
71+
llvm::StringRef OffloadArch;
72+
llvm::StringRef CudaSDKPath;
4773
};
4874

4975
/// Provides top-level interfaces for incremental compilation and execution.
@@ -52,6 +78,9 @@ class Interpreter {
5278
std::unique_ptr<IncrementalParser> IncrParser;
5379
std::unique_ptr<IncrementalExecutor> IncrExecutor;
5480

81+
// An optional parser for CUDA offloading
82+
std::unique_ptr<IncrementalParser> DeviceParser;
83+
5584
Interpreter(std::unique_ptr<CompilerInstance> CI, llvm::Error &Err);
5685

5786
llvm::Error CreateExecutor();
@@ -66,6 +95,9 @@ class Interpreter {
6695
~Interpreter();
6796
static llvm::Expected<std::unique_ptr<Interpreter>>
6897
create(std::unique_ptr<CompilerInstance> CI);
98+
static llvm::Expected<std::unique_ptr<Interpreter>>
99+
createWithCUDA(std::unique_ptr<CompilerInstance> CI,
100+
std::unique_ptr<CompilerInstance> DCI);
69101
const ASTContext &getASTContext() const;
70102
ASTContext &getASTContext();
71103
const CompilerInstance *getCompilerInstance() const;

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/IR/DerivedTypes.h"
2525
#include "llvm/IR/ReplaceConstant.h"
2626
#include "llvm/Support/Format.h"
27+
#include "llvm/Support/VirtualFileSystem.h"
2728

2829
using namespace clang;
2930
using namespace CodeGen;
@@ -721,8 +722,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
721722
// handle so CUDA runtime can figure out what to call on the GPU side.
722723
std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
723724
if (!CudaGpuBinaryFileName.empty()) {
724-
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
725-
llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
725+
auto VFS = CGM.getFileSystem();
726+
auto CudaGpuBinaryOrErr =
727+
VFS->getBufferForFile(CudaGpuBinaryFileName, -1, false);
726728
if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
727729
CGM.getDiags().Report(diag::err_cannot_open_file)
728730
<< CudaGpuBinaryFileName << EC.message();

clang/lib/CodeGen/CodeGenAction.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ namespace clang {
264264
// Links each entry in LinkModules into our module. Returns true on error.
265265
bool LinkInModules() {
266266
for (auto &LM : LinkModules) {
267+
assert(LM.Module && "LinkModule does not actually have a module");
267268
if (LM.PropagateAttrs)
268269
for (Function &F : *LM.Module) {
269270
// Skip intrinsics. Keep consistent with how intrinsics are created
@@ -293,6 +294,7 @@ namespace clang {
293294
if (Err)
294295
return true;
295296
}
297+
LinkModules.clear();
296298
return false; // success
297299
}
298300

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6272,6 +6272,10 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
62726272
}
62736273

62746274
void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) {
6275+
// Device code should not be at top level.
6276+
if (LangOpts.CUDA && LangOpts.CUDAIsDevice)
6277+
return;
6278+
62756279
std::unique_ptr<CodeGenFunction> &CurCGF =
62766280
GlobalTopLevelStmtBlockInFlight.first;
62776281

clang/lib/CodeGen/ModuleBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace {
3636
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; // Only used for debug info.
3737
const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info.
3838
const PreprocessorOptions &PreprocessorOpts; // Only used for debug info.
39-
const CodeGenOptions CodeGenOpts; // Intentionally copied in.
39+
const CodeGenOptions &CodeGenOpts;
4040

4141
unsigned HandlingTopLevelDecls;
4242

clang/lib/Interpreter/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
set(LLVM_LINK_COMPONENTS
22
core
33
native
4+
MC
45
Option
56
OrcJit
67
OrcShared
@@ -11,6 +12,7 @@ set(LLVM_LINK_COMPONENTS
1112
)
1213

1314
add_clang_library(clangInterpreter
15+
DeviceOffload.cpp
1416
IncrementalExecutor.cpp
1517
IncrementalParser.cpp
1618
Interpreter.cpp
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements offloading to CUDA devices.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "DeviceOffload.h"
14+
15+
#include "clang/Basic/TargetOptions.h"
16+
#include "clang/CodeGen/ModuleBuilder.h"
17+
#include "clang/Frontend/CompilerInstance.h"
18+
19+
#include "llvm/IR/LegacyPassManager.h"
20+
#include "llvm/MC/TargetRegistry.h"
21+
#include "llvm/Target/TargetMachine.h"
22+
23+
namespace clang {
24+
25+
IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
26+
Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
27+
IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
28+
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
29+
llvm::Error &Err)
30+
: IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
31+
HostParser(HostParser), VFS(FS) {
32+
if (Err)
33+
return;
34+
StringRef Arch = CI->getTargetOpts().CPU;
35+
if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
36+
Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
37+
"Invalid CUDA architecture",
38+
llvm::inconvertibleErrorCode()));
39+
return;
40+
}
41+
}
42+
43+
llvm::Expected<PartialTranslationUnit &>
44+
IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
45+
auto PTU = IncrementalParser::Parse(Input);
46+
if (!PTU)
47+
return PTU.takeError();
48+
49+
auto PTX = GeneratePTX();
50+
if (!PTX)
51+
return PTX.takeError();
52+
53+
auto Err = GenerateFatbinary();
54+
if (Err)
55+
return std::move(Err);
56+
57+
std::string FatbinFileName =
58+
"/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
59+
VFS->addFile(FatbinFileName, 0,
60+
llvm::MemoryBuffer::getMemBuffer(
61+
llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
62+
"", false));
63+
64+
HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;
65+
66+
FatbinContent.clear();
67+
68+
return PTU;
69+
}
70+
71+
llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
72+
auto &PTU = PTUs.back();
73+
std::string Error;
74+
75+
const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
76+
PTU.TheModule->getTargetTriple(), Error);
77+
if (!Target)
78+
return llvm::make_error<llvm::StringError>(std::move(Error),
79+
std::error_code());
80+
llvm::TargetOptions TO = llvm::TargetOptions();
81+
llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
82+
PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
83+
llvm::Reloc::Model::PIC_);
84+
PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
85+
86+
PTXCode.clear();
87+
llvm::raw_svector_ostream dest(PTXCode);
88+
89+
llvm::legacy::PassManager PM;
90+
if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
91+
llvm::CGFT_AssemblyFile)) {
92+
return llvm::make_error<llvm::StringError>(
93+
"NVPTX backend cannot produce PTX code.",
94+
llvm::inconvertibleErrorCode());
95+
}
96+
97+
if (!PM.run(*PTU.TheModule))
98+
return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
99+
llvm::inconvertibleErrorCode());
100+
101+
PTXCode += '\0';
102+
while (PTXCode.size() % 8)
103+
PTXCode += '\0';
104+
return PTXCode.str();
105+
}
106+
107+
llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
108+
enum FatBinFlags {
109+
AddressSize64 = 0x01,
110+
HasDebugInfo = 0x02,
111+
ProducerCuda = 0x04,
112+
HostLinux = 0x10,
113+
HostMac = 0x20,
114+
HostWindows = 0x40
115+
};
116+
117+
struct FatBinInnerHeader {
118+
uint16_t Kind; // 0x00
119+
uint16_t unknown02; // 0x02
120+
uint32_t HeaderSize; // 0x04
121+
uint32_t DataSize; // 0x08
122+
uint32_t unknown0c; // 0x0c
123+
uint32_t CompressedSize; // 0x10
124+
uint32_t SubHeaderSize; // 0x14
125+
uint16_t VersionMinor; // 0x18
126+
uint16_t VersionMajor; // 0x1a
127+
uint32_t CudaArch; // 0x1c
128+
uint32_t unknown20; // 0x20
129+
uint32_t unknown24; // 0x24
130+
uint32_t Flags; // 0x28
131+
uint32_t unknown2c; // 0x2c
132+
uint32_t unknown30; // 0x30
133+
uint32_t unknown34; // 0x34
134+
uint32_t UncompressedSize; // 0x38
135+
uint32_t unknown3c; // 0x3c
136+
uint32_t unknown40; // 0x40
137+
uint32_t unknown44; // 0x44
138+
FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
139+
: Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
140+
DataSize(DataSize), unknown0c(0), CompressedSize(0),
141+
SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
142+
CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
143+
unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
144+
unknown3c(0), unknown40(0), unknown44(0) {}
145+
};
146+
147+
struct FatBinHeader {
148+
uint32_t Magic; // 0x00
149+
uint16_t Version; // 0x04
150+
uint16_t HeaderSize; // 0x06
151+
uint32_t DataSize; // 0x08
152+
uint32_t unknown0c; // 0x0c
153+
public:
154+
FatBinHeader(uint32_t DataSize)
155+
: Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
156+
DataSize(DataSize), unknown0c(0) {}
157+
};
158+
159+
FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
160+
FatbinContent.append((char *)&OuterHeader,
161+
((char *)&OuterHeader) + OuterHeader.HeaderSize);
162+
163+
FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
164+
FatBinFlags::AddressSize64 |
165+
FatBinFlags::HostLinux);
166+
FatbinContent.append((char *)&InnerHeader,
167+
((char *)&InnerHeader) + InnerHeader.HeaderSize);
168+
169+
FatbinContent.append(PTXCode.begin(), PTXCode.end());
170+
171+
return llvm::Error::success();
172+
}
173+
174+
IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
175+
176+
} // namespace clang

clang/lib/Interpreter/DeviceOffload.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//===----------- DeviceOffload.h - Device Offloading ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements classes required for offloading to CUDA devices.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_CLANG_LIB_INTERPRETER_DEVICE_OFFLOAD_H
14+
#define LLVM_CLANG_LIB_INTERPRETER_DEVICE_OFFLOAD_H
15+
16+
#include "IncrementalParser.h"
17+
#include "llvm/Support/FileSystem.h"
18+
#include "llvm/Support/VirtualFileSystem.h"
19+
20+
namespace clang {
21+
22+
class IncrementalCUDADeviceParser : public IncrementalParser {
23+
public:
24+
IncrementalCUDADeviceParser(
25+
Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
26+
IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
27+
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
28+
llvm::Error &Err);
29+
30+
llvm::Expected<PartialTranslationUnit &>
31+
Parse(llvm::StringRef Input) override;
32+
33+
// Generate PTX for the last PTU
34+
llvm::Expected<llvm::StringRef> GeneratePTX();
35+
36+
// Generate fatbinary contents in memory
37+
llvm::Error GenerateFatbinary();
38+
39+
~IncrementalCUDADeviceParser();
40+
41+
protected:
42+
IncrementalParser &HostParser;
43+
int SMVersion;
44+
llvm::SmallString<1024> PTXCode;
45+
llvm::SmallVector<char, 1024> FatbinContent;
46+
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS;
47+
};
48+
49+
} // namespace clang
50+
51+
#endif // LLVM_CLANG_LIB_INTERPRETER_DEVICE_OFFLOAD_H

0 commit comments

Comments
 (0)