Skip to content

Commit a8ac930

Browse files
[Flang] Add code-object-version option (#72638)
Information about code object version can be configured by the user for AMD GPU target and it needs to be placed in LLVM IR generated by Flang. Information about code object version in MLIR generated by the parser can be reused by other tools. There is no need to specify extra flags if we want to invoke MLIR tools (like fir-opt) separately.
1 parent 02cbae4 commit a8ac930

File tree

15 files changed

+146
-21
lines changed

15 files changed

+146
-21
lines changed

clang/include/clang/Basic/TargetOptions.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,9 @@ class TargetOptions {
7878
/// \brief If enabled, allow AMDGPU unsafe floating point atomics.
7979
bool AllowAMDGPUUnsafeFPAtomics = false;
8080

81-
/// \brief Enumeration value for AMDGPU code object version, which is the
82-
/// code object version times 100.
83-
enum CodeObjectVersionKind {
84-
COV_None,
85-
COV_2 = 200, // Unsupported.
86-
COV_3 = 300, // Unsupported.
87-
COV_4 = 400,
88-
COV_5 = 500,
89-
};
9081
/// \brief Code object version for AMDGPU.
91-
CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
82+
llvm::CodeObjectVersionKind CodeObjectVersion =
83+
llvm::CodeObjectVersionKind::COV_None;
9284

9385
/// \brief Enumeration values for AMDGPU printf lowering scheme
9486
enum class AMDGPUPrintfKind {

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4721,9 +4721,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
47214721

47224722
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
47234723
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
4724-
Visibility<[ClangOption, CC1Option]>,
4724+
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
47254725
Values<"none,4,5">,
4726-
NormalizedValuesScope<"TargetOptions">,
4726+
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
47274727
NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
47284728
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
47294729

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17588,7 +17588,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
1758817588

1758917589
auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
1759017590

17591-
if (Cov == clang::TargetOptions::COV_None) {
17591+
if (Cov == CodeObjectVersionKind::COV_None) {
1759217592
StringRef Name = "__oclc_ABI_version";
1759317593
auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
1759417594
if (!ABIVersionC)
@@ -17606,7 +17606,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
1760617606

1760717607
Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
1760817608
ABIVersion,
17609-
llvm::ConstantInt::get(CGF.Int32Ty, clang::TargetOptions::COV_5));
17609+
llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
1761017610

1761117611
// Indexing the implicit kernarg segment.
1761217612
Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
@@ -17621,7 +17621,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
1762117621
Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
1762217622
} else {
1762317623
Value *GEP = nullptr;
17624-
if (Cov == clang::TargetOptions::COV_5) {
17624+
if (Cov == CodeObjectVersionKind::COV_5) {
1762517625
// Indexing the implicit kernarg segment.
1762617626
GEP = CGF.Builder.CreateConstGEP1_32(
1762717627
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -847,7 +847,7 @@ void CodeGenModule::Release() {
847847
// Emit amdgpu_code_object_version module flag, which is code object version
848848
// times 100.
849849
if (getTarget().getTargetOpts().CodeObjectVersion !=
850-
TargetOptions::COV_None) {
850+
llvm::CodeObjectVersionKind::COV_None) {
851851
getModule().addModuleFlag(llvm::Module::Error,
852852
"amdgpu_code_object_version",
853853
getTarget().getTargetOpts().CodeObjectVersion);

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
368368
return;
369369

370370
if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
371-
clang::TargetOptions::COV_None)
371+
llvm::CodeObjectVersionKind::COV_None)
372372
return;
373373

374374
auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,14 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args,
277277
}
278278
}
279279

280+
void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
281+
ArgStringList &CmdArgs) const {
282+
if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
283+
StringRef Val = A->getValue();
284+
CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
285+
}
286+
}
287+
280288
void Flang::addTargetOptions(const ArgList &Args,
281289
ArgStringList &CmdArgs) const {
282290
const ToolChain &TC = getToolChain();
@@ -300,6 +308,9 @@ void Flang::addTargetOptions(const ArgList &Args,
300308

301309
case llvm::Triple::r600:
302310
case llvm::Triple::amdgcn:
311+
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
312+
AddAMDGPUTargetArgs(Args, CmdArgs);
313+
break;
303314
case llvm::Triple::riscv64:
304315
case llvm::Triple::x86_64:
305316
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);

clang/lib/Driver/ToolChains/Flang.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
6363
void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
6464
llvm::opt::ArgStringList &CmdArgs) const;
6565

66+
/// Add specific options for AMDGPU target.
67+
///
68+
/// \param [in] Args The list of input driver arguments
69+
/// \param [out] CmdArgs The list of output command arguments
70+
void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
71+
llvm::opt::ArgStringList &CmdArgs) const;
72+
6673
/// Extract offload options from the driver arguments and add them to
6774
/// the command arguments.
6875
/// \param [in] C The current compilation for the driver invocation

flang/include/flang/Frontend/CodeGenOptions.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
8585
RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
8686
};
8787

88+
/// \brief Code object version for AMDGPU.
89+
llvm::CodeObjectVersionKind CodeObjectVersion =
90+
llvm::CodeObjectVersionKind::COV_4;
91+
8892
/// Optimization remark with an optional regular expression pattern.
8993
struct OptRemark {
9094
RemarkKind Kind = RemarkKind::RK_Missing;

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,17 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
268268
opts.PrepareForThinLTO = true;
269269
}
270270

271+
if (const llvm::opt::Arg *a = args.getLastArg(
272+
clang::driver::options::OPT_mcode_object_version_EQ)) {
273+
llvm::StringRef s = a->getValue();
274+
if (s == "5")
275+
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5;
276+
if (s == "4")
277+
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_4;
278+
if (s == "none")
279+
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_None;
280+
}
281+
271282
// -f[no-]save-optimization-record[=<format>]
272283
if (const llvm::opt::Arg *a =
273284
args.getLastArg(clang::driver::options::OPT_opt_record_file))

flang/lib/Frontend/FrontendActions.cpp

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,7 @@ static void setMLIRDataLayout(mlir::ModuleOp &mlirModule,
244244
mlirModule->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec);
245245
}
246246

247-
static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
248-
CompilerInstance &ci) {
247+
static void addDependentLibs(mlir::ModuleOp &mlirModule, CompilerInstance &ci) {
249248
const std::vector<std::string> &libs =
250249
ci.getInvocation().getCodeGenOpts().DependentLibs;
251250
if (libs.empty()) {
@@ -264,6 +263,68 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
264263
}
265264
}
266265

266+
// Add to MLIR code target specific items which are dependent on target
267+
// configuration specified by the user.
268+
// Clang equivalent function: AMDGPUTargetCodeGenInfo::emitTargetGlobals
269+
static void addAMDGPUSpecificMLIRItems(mlir::ModuleOp &mlirModule,
270+
CompilerInstance &ci) {
271+
const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
272+
const llvm::Triple triple(targetOpts.triple);
273+
const llvm::StringRef codeObjectVersionGlobalOpName = "__oclc_ABI_version";
274+
275+
// TODO: Share address spaces enumeration between Clang and Flang.
276+
// Currently this enumeration is defined in Clang specific class
277+
// defined in file: clang/lib/Basic/Targets/AMDGPU.h .
278+
// and we need to move it to LLVM directory.
279+
const int constantAddressSpace = 4;
280+
281+
if (!triple.isAMDGPU()) {
282+
return;
283+
}
284+
const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts();
285+
if (codeGenOpts.CodeObjectVersion == llvm::CodeObjectVersionKind::COV_None) {
286+
return;
287+
}
288+
289+
mlir::ConversionPatternRewriter builder(mlirModule.getContext());
290+
unsigned oclcABIVERsion = codeGenOpts.CodeObjectVersion;
291+
auto int32Type = builder.getI32Type();
292+
293+
std::optional<mlir::LLVM::GlobalOp> originalGV;
294+
295+
mlirModule.walk([&originalGV, codeObjectVersionGlobalOpName](
296+
mlir::LLVM::GlobalOp globalOp) {
297+
if (globalOp.getName() == codeObjectVersionGlobalOpName)
298+
originalGV = globalOp;
299+
});
300+
if (originalGV.has_value()) {
301+
mlir::LLVM::GlobalOp originalGVOp = originalGV.value();
302+
if (originalGVOp.getLinkage() != mlir::LLVM::Linkage::External) {
303+
return;
304+
}
305+
// Update the variable if it is already present in MLIR but it was marked
306+
// as external linkage variable
307+
originalGVOp.setLinkage(mlir::LLVM::Linkage::WeakODR);
308+
originalGVOp.setValueAttr(
309+
builder.getIntegerAttr(int32Type, oclcABIVERsion));
310+
originalGVOp.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local);
311+
originalGVOp.setAddrSpace(constantAddressSpace);
312+
originalGVOp.setVisibility_(mlir::LLVM::Visibility::Hidden);
313+
return;
314+
}
315+
316+
mlir::LLVM::GlobalOp covInfo = builder.create<mlir::LLVM::GlobalOp>(
317+
/* Location */ mlirModule.getLoc(), /* Type */ int32Type,
318+
/* IsConstant */ true, /* Linkage */ mlir::LLVM::Linkage::WeakODR,
319+
/* Name */ codeObjectVersionGlobalOpName,
320+
/* Value */ builder.getIntegerAttr(int32Type, oclcABIVERsion));
321+
covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local);
322+
covInfo.setAddrSpace(constantAddressSpace);
323+
covInfo.setVisibility_(mlir::LLVM::Visibility::Hidden);
324+
builder.setInsertionPointToStart(mlirModule.getBody());
325+
builder.insert(covInfo);
326+
}
327+
267328
bool CodeGenAction::beginSourceFileAction() {
268329
llvmCtx = std::make_unique<llvm::LLVMContext>();
269330
CompilerInstance &ci = this->getInstance();
@@ -365,8 +426,10 @@ bool CodeGenAction::beginSourceFileAction() {
365426
Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()};
366427
lb.lower(parseTree, ci.getInvocation().getSemanticsContext());
367428

368-
// Add dependent libraries
369-
addDepdendentLibs(*mlirModule, ci);
429+
// Add target specific items like dependent libraries, target specific
430+
// constants etc.
431+
addDependentLibs(*mlirModule, ci);
432+
addAMDGPUSpecificMLIRItems(*mlirModule, ci);
370433

371434
// run the default passes.
372435
mlir::PassManager pm((*mlirModule)->getName(),
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
! RUN: not %flang -target amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=3 -S %s -o \
2+
! RUN: /dev/null 2>&1 | FileCheck --check-prefix=INVALID_VERSION %s
3+
4+
! RUN: %flang -target x86_64-unknown-linux-gnu -mcode-object-version=3 -S %s -o \
5+
! RUN: /dev/null 2>&1 | FileCheck --check-prefix=UNUSED_PARAM %s
6+
7+
! INVALID_VERSION: error: invalid integral value '3' in '-mcode-object-version=3'
8+
! UNUSED_PARAM: warning: argument unused during compilation: '-mcode-object-version=3' [-Wunused-command-line-argument]

flang/test/Driver/driver-help-hidden.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@
114114
! CHECK-NEXT: -I <dir> Add directory to the end of the list of include search paths
115115
! CHECK-NEXT: -L <dir> Add directory to library search path
116116
! CHECK-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help'
117+
! CHECK-NEXT: -mcode-object-version=<value>
118+
! CHECK-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
117119
! CHECK-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help'
118120
! CHECK-NEXT: -mllvm=<arg> Alias for -mllvm
119121
! CHECK-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing

flang/test/Driver/driver-help.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@
100100
! HELP-NEXT: -I <dir> Add directory to the end of the list of include search paths
101101
! HELP-NEXT: -L <dir> Add directory to library search path
102102
! HELP-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help'
103+
! HELP-NEXT: -mcode-object-version=<value>
104+
! HELP-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
103105
! HELP-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help'
104106
! HELP-NEXT: -mllvm=<arg> Alias for -mllvm
105107
! HELP-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
@@ -232,6 +234,8 @@
232234
! HELP-FC1-NEXT: -init-only Only execute frontend initialization
233235
! HELP-FC1-NEXT: -I <dir> Add directory to the end of the list of include search paths
234236
! HELP-FC1-NEXT: -load <dsopath> Load the named plugin (dynamic shared object)
237+
! HELP-FC1-NEXT: -mcode-object-version=<value>
238+
! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
235239
! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities.
236240
! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs.
237241
! HELP-FC1-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
!REQUIRES: amdgpu-registered-target
2+
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 %s -o - | FileCheck --check-prefix=COV_DEFAULT %s
3+
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s
4+
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s
5+
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s
6+
7+
!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
8+
!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
9+
!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
10+
!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32
11+
subroutine target_simple
12+
end subroutine target_simple
13+

llvm/include/llvm/Target/TargetOptions.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,16 @@ namespace llvm {
121121
Never,
122122
};
123123

124+
/// \brief Enumeration value for AMDGPU code object version, which is the
125+
/// code object version times 100.
126+
enum CodeObjectVersionKind {
127+
COV_None,
128+
COV_2 = 200, // Unsupported.
129+
COV_3 = 300, // Unsupported.
130+
COV_4 = 400,
131+
COV_5 = 500,
132+
};
133+
124134
class TargetOptions {
125135
public:
126136
TargetOptions()

0 commit comments

Comments
 (0)