Skip to content

Commit 6efc92e

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:e3f5a1bfc58b into origin/amd-gfx:6b6e30e6b6dc
Local branch origin/amd-gfx 6b6e30e Merged main:74f69c49fed8 into origin/amd-gfx:c4e1c89c4f41 Remote branch main e3f5a1b [LLDB][NFC]Also includes the error in log msg. (llvm#134922)
2 parents 6b6e30e + e3f5a1b commit 6efc92e

File tree

647 files changed

+1153
-71193
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

647 files changed

+1153
-71193
lines changed

clang/include/clang/AST/OpenACCClause.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class OpenACCClause {
3838
OpenACCClauseKind getClauseKind() const { return Kind; }
3939
SourceLocation getBeginLoc() const { return Location.getBegin(); }
4040
SourceLocation getEndLoc() const { return Location.getEnd(); }
41+
SourceRange getSourceRange() const { return Location; }
4142

4243
static bool classof(const OpenACCClause *) { return true; }
4344

clang/lib/AST/ByteCode/Compiler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6823,7 +6823,7 @@ bool Compiler<Emitter>::emitDestruction(const Descriptor *Desc,
68236823
return true;
68246824
}
68256825

6826-
if (size_t N = Desc->getNumElems()) {
6826+
if (unsigned N = Desc->getNumElems()) {
68276827
for (ssize_t I = N - 1; I >= 0; --I) {
68286828
if (!this->emitConstUint64(I, Loc))
68296829
return false;

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,16 @@ class CIRGenFunction : public CIRGenTypeCache {
570570
//===--------------------------------------------------------------------===//
571571
// OpenACC Emission
572572
//===--------------------------------------------------------------------===//
573+
private:
574+
// Function to do the basic implementation of a 'compute' operation, including
575+
// the clauses/etc. This might be generalizable in the future to work for
576+
// other constructs, or at least be the base for construct emission.
577+
template <typename Op, typename TermOp>
578+
mlir::LogicalResult
579+
emitOpenACCComputeOp(mlir::Location start, mlir::Location end,
580+
llvm::ArrayRef<const OpenACCClause *> clauses,
581+
const Stmt *structuredBlock);
582+
573583
public:
574584
mlir::LogicalResult
575585
emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s);

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,79 @@
1212

1313
#include "CIRGenBuilder.h"
1414
#include "CIRGenFunction.h"
15+
#include "clang/AST/OpenACCClause.h"
1516
#include "clang/AST/StmtOpenACC.h"
1617

18+
#include "mlir/Dialect/OpenACC/OpenACC.h"
19+
1720
using namespace clang;
1821
using namespace clang::CIRGen;
1922
using namespace cir;
23+
using namespace mlir::acc;
24+
25+
namespace {
26+
class OpenACCClauseCIREmitter final
27+
: public OpenACCClauseVisitor<OpenACCClauseCIREmitter> {
28+
CIRGenModule &cgm;
29+
30+
void clauseNotImplemented(const OpenACCClause &c) {
31+
cgm.errorNYI(c.getSourceRange(), "OpenACC Clause", c.getClauseKind());
32+
}
33+
34+
public:
35+
OpenACCClauseCIREmitter(CIRGenModule &cgm) : cgm(cgm) {}
36+
37+
#define VISIT_CLAUSE(CN) \
38+
void Visit##CN##Clause(const OpenACC##CN##Clause &clause) { \
39+
clauseNotImplemented(clause); \
40+
}
41+
#include "clang/Basic/OpenACCClauses.def"
42+
};
43+
} // namespace
44+
45+
template <typename Op, typename TermOp>
46+
mlir::LogicalResult CIRGenFunction::emitOpenACCComputeOp(
47+
mlir::Location start, mlir::Location end,
48+
llvm::ArrayRef<const OpenACCClause *> clauses,
49+
const Stmt *structuredBlock) {
50+
mlir::LogicalResult res = mlir::success();
51+
52+
OpenACCClauseCIREmitter clauseEmitter(getCIRGenModule());
53+
clauseEmitter.VisitClauseList(clauses);
54+
55+
llvm::SmallVector<mlir::Type> retTy;
56+
llvm::SmallVector<mlir::Value> operands;
57+
auto op = builder.create<Op>(start, retTy, operands);
58+
59+
mlir::Block &block = op.getRegion().emplaceBlock();
60+
mlir::OpBuilder::InsertionGuard guardCase(builder);
61+
builder.setInsertionPointToEnd(&block);
62+
63+
LexicalScope ls{*this, start, builder.getInsertionBlock()};
64+
res = emitStmt(structuredBlock, /*useCurrentScope=*/true);
65+
66+
builder.create<TermOp>(end);
67+
return res;
68+
}
2069

2170
mlir::LogicalResult
2271
CIRGenFunction::emitOpenACCComputeConstruct(const OpenACCComputeConstruct &s) {
23-
getCIRGenModule().errorNYI(s.getSourceRange(), "OpenACC Compute Construct");
24-
return mlir::failure();
72+
mlir::Location start = getLoc(s.getSourceRange().getEnd());
73+
mlir::Location end = getLoc(s.getSourceRange().getEnd());
74+
75+
switch (s.getDirectiveKind()) {
76+
case OpenACCDirectiveKind::Parallel:
77+
return emitOpenACCComputeOp<ParallelOp, mlir::acc::YieldOp>(
78+
start, end, s.clauses(), s.getStructuredBlock());
79+
case OpenACCDirectiveKind::Serial:
80+
return emitOpenACCComputeOp<SerialOp, mlir::acc::YieldOp>(
81+
start, end, s.clauses(), s.getStructuredBlock());
82+
case OpenACCDirectiveKind::Kernels:
83+
return emitOpenACCComputeOp<KernelsOp, mlir::acc::TerminatorOp>(
84+
start, end, s.clauses(), s.getStructuredBlock());
85+
default:
86+
llvm_unreachable("invalid compute construct kind");
87+
}
2588
}
2689

2790
mlir::LogicalResult

clang/lib/CIR/CodeGen/CIRGenerator.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "CIRGenModule.h"
1414

15+
#include "mlir/Dialect/OpenACC/OpenACC.h"
1516
#include "mlir/IR/MLIRContext.h"
1617

1718
#include "clang/AST/DeclGroup.h"
@@ -36,6 +37,7 @@ void CIRGenerator::Initialize(ASTContext &astContext) {
3637

3738
mlirContext = std::make_unique<mlir::MLIRContext>();
3839
mlirContext->loadDialect<cir::CIRDialect>();
40+
mlirContext->getOrLoadDialect<mlir::acc::OpenACCDialect>();
3941
cgm = std::make_unique<clang::CIRGen::CIRGenModule>(
4042
*mlirContext.get(), astContext, codeGenOpts, diags);
4143
}

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,8 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
12811281
return nullptr;
12821282
}
12831283
if (CGM.getLangOpts().OffloadViaLLVM ||
1284-
(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
1284+
(CGM.getLangOpts().OffloadingNewDriver &&
1285+
(CGM.getLangOpts().HIP || RelocatableDeviceCode)))
12851286
createOffloadingEntries();
12861287
else
12871288
return makeModuleCtorFunction();

clang/lib/Driver/Driver.cpp

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4405,6 +4405,10 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
44054405
options::OPT_no_offload_new_driver,
44064406
C.isOffloadingHostKind(Action::OFK_Cuda));
44074407

4408+
bool HIPNoRDC =
4409+
C.isOffloadingHostKind(Action::OFK_HIP) &&
4410+
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
4411+
44084412
// Builder to be used to build offloading actions.
44094413
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
44104414
!UseNewOffloadingDriver
@@ -4538,7 +4542,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
45384542
// Check if this Linker Job should emit a static library.
45394543
if (ShouldEmitStaticLibrary(Args)) {
45404544
LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
4541-
} else if (UseNewOffloadingDriver ||
4545+
} else if ((UseNewOffloadingDriver && !HIPNoRDC) ||
45424546
Args.hasArg(options::OPT_offload_link)) {
45434547
LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image);
45444548
LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
@@ -4849,10 +4853,28 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
48494853
const InputTy &Input, StringRef CUID,
48504854
Action *HostAction) const {
48514855
// Don't build offloading actions if explicitly disabled or we do not have a
4852-
// valid source input and compile action to embed it in. If preprocessing only
4853-
// ignore embedding.
4854-
if (offloadHostOnly() || !types::isSrcFile(Input.first) ||
4855-
!(isa<CompileJobAction>(HostAction) ||
4856+
// valid source input.
4857+
if (offloadHostOnly() || !types::isSrcFile(Input.first))
4858+
return HostAction;
4859+
4860+
bool HIPNoRDC =
4861+
C.isOffloadingHostKind(Action::OFK_HIP) &&
4862+
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
4863+
4864+
// For HIP non-rdc non-device-only compilation, create a linker wrapper
4865+
// action for each host object to link, bundle and wrap device files in
4866+
// it.
4867+
if (isa<AssembleJobAction>(HostAction) && HIPNoRDC && !offloadDeviceOnly()) {
4868+
ActionList AL{HostAction};
4869+
HostAction = C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_Object);
4870+
HostAction->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
4871+
/*BoundArch=*/nullptr);
4872+
return HostAction;
4873+
}
4874+
4875+
// Don't build offloading actions if we do not have a compile action. If
4876+
// preprocessing only ignore embedding.
4877+
if (!(isa<CompileJobAction>(HostAction) ||
48564878
getFinalPhase(Args) == phases::Preprocess))
48574879
return HostAction;
48584880

@@ -4948,12 +4970,12 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49484970
}
49494971
}
49504972

4951-
// Compiling HIP in non-RDC mode requires linking each action individually.
4973+
// Compiling HIP in device-only non-RDC mode requires linking each action
4974+
// individually.
49524975
for (Action *&A : DeviceActions) {
49534976
if ((A->getType() != types::TY_Object &&
49544977
A->getType() != types::TY_LTO_BC) ||
4955-
Kind != Action::OFK_HIP ||
4956-
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
4978+
!HIPNoRDC || !offloadDeviceOnly())
49574979
continue;
49584980
ActionList LinkerInput = {A};
49594981
A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image);
@@ -4977,12 +4999,12 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49774999
}
49785000
}
49795001

4980-
// HIP code in non-RDC mode will bundle the output if it invoked the linker.
5002+
// HIP code in device-only non-RDC mode will bundle the output if it invoked
5003+
// the linker.
49815004
bool ShouldBundleHIP =
4982-
C.isOffloadingHostKind(Action::OFK_HIP) &&
5005+
HIPNoRDC && offloadDeviceOnly() &&
49835006
Args.hasFlag(options::OPT_gpu_bundle_output,
49845007
options::OPT_no_gpu_bundle_output, true) &&
4985-
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) &&
49865008
!llvm::any_of(OffloadActions,
49875009
[](Action *A) { return A->getType() != types::TY_Image; });
49885010

@@ -5002,11 +5024,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
50025024
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN);
50035025
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
50045026
nullptr, Action::OFK_Cuda);
5005-
} else if (C.isOffloadingHostKind(Action::OFK_HIP) &&
5006-
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
5007-
false)) {
5008-
// If we are not in RDC-mode we just emit the final HIP fatbinary for each
5009-
// translation unit, linking each input individually.
5027+
} else if (HIPNoRDC && offloadDeviceOnly()) {
5028+
// If we are in device-only non-RDC-mode we just emit the final HIP
5029+
// fatbinary for each translation unit, linking each input individually.
50105030
Action *FatbinAction =
50115031
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN);
50125032
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(),
@@ -5159,8 +5179,11 @@ Action *Driver::ConstructPhaseAction(
51595179
(((Input->getOffloadingToolChain() &&
51605180
Input->getOffloadingToolChain()->getTriple().isAMDGPU()) ||
51615181
TargetDeviceOffloadKind == Action::OFK_HIP) &&
5162-
(Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
5163-
false) ||
5182+
((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
5183+
false) ||
5184+
(Args.hasFlag(options::OPT_offload_new_driver,
5185+
options::OPT_no_offload_new_driver, false) &&
5186+
!offloadDeviceOnly())) ||
51645187
TargetDeviceOffloadKind == Action::OFK_OpenMP))) {
51655188
types::ID Output =
51665189
Args.hasArg(options::OPT_S) &&

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7832,7 +7832,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
78327832
CmdArgs.push_back("-fcuda-include-gpubinary");
78337833
CmdArgs.push_back(CudaDeviceInput->getFilename());
78347834
} else if (!HostOffloadingInputs.empty()) {
7835-
if ((IsCuda || IsHIP) && !IsRDCMode) {
7835+
if (IsCuda && !IsRDCMode) {
78367836
assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
78377837
CmdArgs.push_back("-fcuda-include-gpubinary");
78387838
CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
@@ -9344,11 +9344,22 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
93449344
// Add the linker arguments to be forwarded by the wrapper.
93459345
CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") +
93469346
LinkCommand->getExecutable()));
9347-
for (const char *LinkArg : LinkCommand->getArguments())
9348-
CmdArgs.push_back(LinkArg);
93499347

9350-
addOffloadCompressArgs(Args, CmdArgs);
9348+
// We use action type to differentiate two use cases of the linker wrapper.
9349+
// TY_Image for normal linker wrapper work.
9350+
// TY_Object for HIP fno-gpu-rdc embedding device binary in a relocatable
9351+
// object.
9352+
assert(JA.getType() == types::TY_Object || JA.getType() == types::TY_Image);
9353+
if (JA.getType() == types::TY_Object) {
9354+
CmdArgs.append({"-o", Output.getFilename()});
9355+
for (auto Input : Inputs)
9356+
CmdArgs.push_back(Input.getFilename());
9357+
CmdArgs.push_back("-r");
9358+
} else
9359+
for (const char *LinkArg : LinkCommand->getArguments())
9360+
CmdArgs.push_back(LinkArg);
93519361

9362+
addOffloadCompressArgs(Args, CmdArgs);
93529363
const char *Exec =
93539364
Args.MakeArgString(getToolChain().GetProgramPath("clang-linker-wrapper"));
93549365

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
2+
3+
void acc_kernels(void) {
4+
// CHECK: cir.func @acc_kernels() {
5+
#pragma acc kernels
6+
{}
7+
8+
// CHECK-NEXT: acc.kernels {
9+
// CHECK-NEXT:acc.terminator
10+
// CHECK-NEXT:}
11+
12+
#pragma acc kernels
13+
while(1){}
14+
// CHECK-NEXT: acc.kernels {
15+
// CHECK-NEXT: cir.scope {
16+
// CHECK-NEXT: cir.while {
17+
// CHECK-NEXT: %[[INT:.*]] = cir.const #cir.int<1>
18+
// CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[INT]] :
19+
// CHECK-NEXT: cir.condition(%[[CAST]])
20+
// CHECK-NEXT: } do {
21+
// CHECK-NEXT: cir.yield
22+
// cir.while do end:
23+
// CHECK-NEXT: }
24+
// cir.scope end:
25+
// CHECK-NEXT: }
26+
// CHECK-NEXT:acc.terminator
27+
// CHECK-NEXT:}
28+
29+
// CHECK-NEXT: cir.return
30+
}

clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
void HelloWorld(int *A, int *B, int *C, int N) {
55

6-
// expected-error@+2{{ClangIR code gen Not Yet Implemented: OpenACC Compute Construct}}
6+
// expected-error@+2{{ClangIR code gen Not Yet Implemented: OpenACC Combined Construct}}
77
// expected-error@+1{{ClangIR code gen Not Yet Implemented: statement}}
8-
#pragma acc parallel
8+
#pragma acc parallel loop
99
for (unsigned I = 0; I < N; ++I)
1010
A[I] = B[I] + C[I];
1111

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
2+
3+
void acc_parallel(void) {
4+
// CHECK: cir.func @acc_parallel() {
5+
#pragma acc parallel
6+
{}
7+
// CHECK-NEXT: acc.parallel {
8+
// CHECK-NEXT:acc.yield
9+
// CHECK-NEXT:}
10+
11+
#pragma acc parallel
12+
while(1){}
13+
// CHECK-NEXT: acc.parallel {
14+
// CHECK-NEXT: cir.scope {
15+
// CHECK-NEXT: cir.while {
16+
// CHECK-NEXT: %[[INT:.*]] = cir.const #cir.int<1>
17+
// CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[INT]] :
18+
// CHECK-NEXT: cir.condition(%[[CAST]])
19+
// CHECK-NEXT: } do {
20+
// CHECK-NEXT: cir.yield
21+
// cir.while do end:
22+
// CHECK-NEXT: }
23+
// cir.scope end:
24+
// CHECK-NEXT: }
25+
// CHECK-NEXT:acc.yield
26+
// CHECK-NEXT:}
27+
28+
// CHECK-NEXT: cir.return
29+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
2+
3+
void acc_serial(void) {
4+
// CHECK: cir.func @acc_serial() {
5+
#pragma acc serial
6+
{}
7+
8+
// CHECK-NEXT: acc.serial {
9+
// CHECK-NEXT:acc.yield
10+
// CHECK-NEXT:}
11+
12+
#pragma acc serial
13+
while(1){}
14+
// CHECK-NEXT: acc.serial {
15+
// CHECK-NEXT: cir.scope {
16+
// CHECK-NEXT: cir.while {
17+
// CHECK-NEXT: %[[INT:.*]] = cir.const #cir.int<1>
18+
// CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[INT]] :
19+
// CHECK-NEXT: cir.condition(%[[CAST]])
20+
// CHECK-NEXT: } do {
21+
// CHECK-NEXT: cir.yield
22+
// cir.while do end:
23+
// CHECK-NEXT: }
24+
// cir.scope end:
25+
// CHECK-NEXT: }
26+
// CHECK-NEXT:acc.yield
27+
// CHECK-NEXT:}
28+
29+
// CHECK-NEXT: cir.return
30+
}

0 commit comments

Comments
 (0)