Skip to content

Commit a8fe4a5

Browse files
[SYCL][CUDA] Adds PI CUDA support for reqd_work_group_size attribute (#3735)
This commit adds support for reqd_work_group_size in the PI CUDA backend by extracting the attribute as program metadata. The program metadata accompanies the binary when passed to the backend and it is up to the backend if they extract any useful metadata. This adds two additional parameters to piProgramCreateWithBinary for passing the program metadata. Program metadata is transported as a properties created by sycl-post-link, so this commit also changes the behaviour of the NVPTX path for linkage actions leading to the offload wrapper. These changes uses file tables for the NVPTX path as well to allow generation and preservation of properties. This assumes that the file table only ever contains a single row if taking the NVPTX path and will fail otherwise. Signed-off-by: Steffen Larsen <[email protected]>
1 parent f7aa2bf commit a8fe4a5

File tree

32 files changed

+2031
-619
lines changed

32 files changed

+2031
-619
lines changed

clang/include/clang/Driver/Action.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,14 @@ class FileTableTformJobAction : public JobAction {
772772

773773
public:
774774
struct Tform {
775-
enum Kind { EXTRACT, EXTRACT_DROP_TITLE, REPLACE, RENAME };
775+
enum Kind {
776+
EXTRACT,
777+
EXTRACT_DROP_TITLE,
778+
REPLACE,
779+
REPLACE_CELL,
780+
RENAME,
781+
COPY_SINGLE_FILE
782+
};
776783

777784
Tform() = default;
778785
Tform(Kind K, std::initializer_list<StringRef> Args) : TheKind(K) {
@@ -794,10 +801,19 @@ class FileTableTformJobAction : public JobAction {
794801
// <To> from another file table passed as input to this action.
795802
void addReplaceColumnTform(StringRef From, StringRef To);
796803

804+
// Replaces a cell in this table with column title <ColumnName> and row <Row>
805+
// with the file name passed as input to this action.
806+
void addReplaceCellTform(StringRef ColumnName, int Row);
807+
797808
// Renames a column with title <From> in this table with a column with title
798809
// <To> passed as input to this action.
799810
void addRenameColumnTform(StringRef From, StringRef To);
800811

812+
// Specifies that, instead of generating a new table, the transformation
813+
// should copy the file at column <ColumnName> and row <Row> into the
814+
// output file.
815+
void addCopySingleFileTform(StringRef ColumnName, int Row);
816+
801817
static bool classof(const Action *A) {
802818
return A->getKind() == FileTableTformJobClass;
803819
}
@@ -806,6 +822,9 @@ class FileTableTformJobAction : public JobAction {
806822

807823
private:
808824
SmallVector<Tform, 2> Tforms; // transformation actions requested
825+
826+
// column to copy single file from if requested
827+
std::string CopySingleFileColumnName;
809828
};
810829

811830
class AppendFooterJobAction : public JobAction {

clang/lib/Driver/Action.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,11 +507,23 @@ void FileTableTformJobAction::addReplaceColumnTform(StringRef From,
507507
Tforms.emplace_back(Tform(Tform::REPLACE, {From, To}));
508508
}
509509

510+
void FileTableTformJobAction::addReplaceCellTform(StringRef ColumnName,
511+
int Row) {
512+
Tforms.emplace_back(
513+
Tform(Tform::REPLACE_CELL, {ColumnName, std::to_string(Row)}));
514+
}
515+
510516
void FileTableTformJobAction::addRenameColumnTform(StringRef From,
511517
StringRef To) {
512518
Tforms.emplace_back(Tform(Tform::RENAME, {From, To}));
513519
}
514520

521+
void FileTableTformJobAction::addCopySingleFileTform(StringRef ColumnName,
522+
int Row) {
523+
Tforms.emplace_back(
524+
Tform(Tform::COPY_SINGLE_FILE, {ColumnName, std::to_string(Row)}));
525+
}
526+
515527
void AppendFooterJobAction::anchor() {}
516528

517529
AppendFooterJobAction::AppendFooterJobAction(Action *Input, types::ID Type)

clang/lib/Driver/Driver.cpp

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4378,33 +4378,33 @@ class OffloadingActionBuilder final {
43784378
// .--------------------------------------.
43794379
// | PostLink |
43804380
// .--------------------------------------.
4381-
// [.n] [+*] [+*]
4381+
// [+n] [+*] [+]
43824382
// | | |
4383-
// | .-----------------. |
4384-
// | | FileTableTform | |
4385-
// | | (extract "Code")| |
4386-
// | .-----------------. |
4387-
// | [-] |
4383+
// .----------------. .-----------------. |
4384+
// | FileTableTform | | FileTableTform | |
4385+
// | (copy "Code") | | (extract "Code")| |
4386+
// .----------------. .-----------------. |
4387+
// [.] [-] |
43884388
// | | |
4389-
// | [-*] |
4390-
// .-------------. .-------------------. |
4391-
// |finalizeNVPTX| | SPIRVTranslator | |
4392-
// .-------------. .-------------------. |
4393-
// | [-as] [-!a] |
4389+
// [.] [-*] |
4390+
// .---------------. .-------------------. |
4391+
// | finalizeNVPTX | | SPIRVTranslator | |
4392+
// .---------------. .-------------------. |
4393+
// [.] [-as] [-!a] |
43944394
// | | | |
43954395
// | [-s] | |
43964396
// | .----------------. | |
43974397
// | | BackendCompile | | |
43984398
// | .----------------. | |
43994399
// | [-s] | |
44004400
// | | | |
4401-
// | [-a] [-!a] [+]
4402-
// | .--------------------.
4403-
// | | FileTableTform |
4404-
// | | (replace "Code") |
4405-
// | .--------------------.
4406-
// | |
4407-
// [.n] [+*]
4401+
// [.] [-a] [-!a] [+]
4402+
// .------------------------------------.
4403+
// | FileTableTform |
4404+
// | (replace "Code") |
4405+
// .------------------------------------.
4406+
// |
4407+
// [+]
44084408
// .--------------------------------------.
44094409
// | OffloadWrapper |
44104410
// .--------------------------------------.
@@ -4451,24 +4451,40 @@ class OffloadingActionBuilder final {
44514451
ActionList WrapperInputs;
44524452
// post link is not optional - even if not splitting, always need to
44534453
// process specialization constants
4454-
types::ID PostLinkOutType =
4455-
isNVPTX || isAMDGCN ? types::TY_LLVM_BC : types::TY_Tempfiletable;
44564454
auto *PostLinkAction = C.MakeAction<SYCLPostLinkJobAction>(
4457-
FullDeviceLinkAction, PostLinkOutType);
4455+
FullDeviceLinkAction, types::TY_Tempfiletable);
44584456
PostLinkAction->setRTSetsSpecConstants(!isAOT);
44594457

4460-
if (isNVPTX) {
4461-
Action *FinAction =
4462-
finalizeNVPTXDependences(PostLinkAction, (*TC)->getTriple());
4463-
WrapperInputs.push_back(FinAction);
4464-
} else if (isAMDGCN) {
4465-
Action *FinAction =
4466-
finalizeAMDGCNDependences(PostLinkAction, (*TC)->getTriple());
4467-
WrapperInputs.push_back(FinAction);
4458+
constexpr char COL_CODE[] = "Code";
4459+
4460+
if (isNVPTX || isAMDGCN) {
4461+
// Make extraction copy the only remaining code file instead of
4462+
// creating a new table with a single entry.
4463+
// TODO: Process all PTX code files in file table to enable code
4464+
// splitting for PTX target.
4465+
auto *ExtractIRFilesAction = C.MakeAction<FileTableTformJobAction>(
4466+
PostLinkAction, types::TY_LLVM_BC);
4467+
ExtractIRFilesAction->addCopySingleFileTform(COL_CODE, 0);
4468+
4469+
Action *FinAction;
4470+
if (isNVPTX) {
4471+
FinAction = finalizeNVPTXDependences(ExtractIRFilesAction,
4472+
(*TC)->getTriple());
4473+
} else /* isAMDGCN */ {
4474+
FinAction = finalizeAMDGCNDependences(ExtractIRFilesAction,
4475+
(*TC)->getTriple());
4476+
}
4477+
ActionList TformInputs{PostLinkAction, FinAction};
4478+
4479+
// Replace the only code entry in the table, as confirmed by the
4480+
// previous transformation.
4481+
auto *ReplaceFilesAction = C.MakeAction<FileTableTformJobAction>(
4482+
TformInputs, types::TY_Tempfiletable);
4483+
ReplaceFilesAction->addReplaceCellTform(COL_CODE, 0);
4484+
WrapperInputs.push_back(ReplaceFilesAction);
44684485
} else {
44694486
// For SPIRV-based targets - translate to SPIRV then optionally
44704487
// compile ahead-of-time to native architecture
4471-
constexpr char COL_CODE[] = "Code";
44724488
auto *ExtractIRFilesAction = C.MakeAction<FileTableTformJobAction>(
44734489
PostLinkAction, types::TY_Tempfilelist);
44744490
// single column w/o title fits TY_Tempfilelist format
@@ -4513,6 +4529,7 @@ class OffloadingActionBuilder final {
45134529
ReplaceFilesAction->addReplaceColumnTform(COL_CODE, COL_CODE);
45144530
WrapperInputs.push_back(ReplaceFilesAction);
45154531
}
4532+
45164533
// After the Link, wrap the files before the final host link
45174534
auto *DeviceWrappingAction = C.MakeAction<OffloadWrapperJobAction>(
45184535
WrapperInputs, types::TY_Object);

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8860,6 +8860,9 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
88608860
TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization,
88618861
options::OPT_fno_sycl_dead_args_optimization, false))
88628862
addArgs(CmdArgs, TCArgs, {"-emit-param-info"});
8863+
// Enable PI program metadata
8864+
if (getToolChain().getTriple().isNVPTX())
8865+
addArgs(CmdArgs, TCArgs, {"-emit-program-metadata"});
88638866
if (JA.getType() == types::TY_LLVM_BC) {
88648867
// single file output requested - this means only perform necessary IR
88658868
// transformations (like specialization constant intrinsic lowering) and
@@ -8946,6 +8949,15 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA,
89468949
addArgs(CmdArgs, TCArgs, {Arg});
89478950
break;
89488951
}
8952+
case FileTableTformJobAction::Tform::REPLACE_CELL: {
8953+
assert(Tf.TheArgs.size() == 2 && "column name and row id expected");
8954+
SmallString<128> Arg("-replace_cell=");
8955+
Arg += Tf.TheArgs[0];
8956+
Arg += ",";
8957+
Arg += Tf.TheArgs[1];
8958+
addArgs(CmdArgs, TCArgs, {Arg});
8959+
break;
8960+
}
89498961
case FileTableTformJobAction::Tform::RENAME: {
89508962
assert(Tf.TheArgs.size() == 2 && "from/to names expected");
89518963
SmallString<128> Arg("-rename=");
@@ -8955,8 +8967,18 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA,
89558967
addArgs(CmdArgs, TCArgs, {Arg});
89568968
break;
89578969
}
8970+
case FileTableTformJobAction::Tform::COPY_SINGLE_FILE: {
8971+
assert(Tf.TheArgs.size() == 2 && "column name and row id expected");
8972+
SmallString<128> Arg("-copy_single_file=");
8973+
Arg += Tf.TheArgs[0];
8974+
Arg += ",";
8975+
Arg += Tf.TheArgs[1];
8976+
addArgs(CmdArgs, TCArgs, {Arg});
8977+
break;
8978+
}
89588979
}
89598980
}
8981+
89608982
// 2) add output option
89618983
assert(Output.isFilename() && "table tform output must be a file");
89628984
addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()});

clang/test/Driver/sycl-offload-amdgcn.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@
2828
// CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl)
2929
// CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl)
3030
// CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl)
31-
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl)
32-
// CHK-PHASES-NO-CC: 13: backend, {12}, assembler, (device-sycl)
33-
// CHK-PHASES-NO-CC: 14: assembler, {13}, object, (device-sycl)
34-
// CHK-PHASES-NO-CC: 15: linker, {14}, image, (device-sycl)
35-
// CHK-PHASES-NO-CC: 16: linker, {15}, hip-fatbin, (device-sycl)
36-
// CHK-PHASES-NO-CC: 17: clang-offload-wrapper, {16}, object, (device-sycl)
37-
// CHK-PHASES-NO-CC: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {17}, image
31+
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl)
32+
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl)
33+
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl)
34+
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl)
35+
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl)
36+
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl)
37+
// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl)
38+
// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl)
39+
// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {19}, image

clang/test/Driver/sycl-offload-nvptx.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@
2828
// CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl)
2929
// CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl)
3030
// CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl, sm_50)
31-
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50)
32-
// CHK-PHASES-NO-CC: 13: backend, {12}, assembler, (device-sycl, sm_50)
33-
// CHK-PHASES-NO-CC: 14: clang-offload-wrapper, {13}, object, (device-sycl, sm_50)
34-
// CHK-PHASES-NO-CC: 15: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {14}, image
31+
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_50)
32+
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, sm_50)
33+
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, sm_50)
34+
// CHK-PHASES-NO-CC: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_50)
35+
// CHK-PHASES-NO-CC: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_50)
36+
// CHK-PHASES-NO-CC: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {16}, image
3537

3638
/// Check phases specifying a compute capability.
3739
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
@@ -50,7 +52,9 @@
5052
// CHK-PHASES: 9: assembler, {8}, object, (host-sycl)
5153
// CHK-PHASES: 10: linker, {9}, image, (host-sycl)
5254
// CHK-PHASES: 11: linker, {5}, ir, (device-sycl, sm_35)
53-
// CHK-PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_35)
54-
// CHK-PHASES: 13: backend, {12}, assembler, (device-sycl, sm_35)
55-
// CHK-PHASES: 14: clang-offload-wrapper, {13}, object, (device-sycl, sm_35)
56-
// CHK-PHASES: 15: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {14}, image
55+
// CHK-PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_35)
56+
// CHK-PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_35)
57+
// CHK-PHASES: 14: backend, {13}, assembler, (device-sycl, sm_35)
58+
// CHK-PHASES: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_35)
59+
// CHK-PHASES: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_35)
60+
// CHK-PHASES: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {16}, image

llvm/include/llvm/Support/PropertySetIO.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ class PropertySetRegistry {
188188
"SYCL/specialization constants default values";
189189
static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask";
190190
static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt";
191+
static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata";
191192
static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties";
192193
static constexpr char SYCL_ASSERT_USED[] = "SYCL/assert used";
193194

llvm/include/llvm/Support/SimpleTable.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class SimpleTable {
9797
Error replaceColumn(StringRef Name, const SimpleTable &Src,
9898
StringRef SrcName = "");
9999

100+
// Replaces the value in a cell at a given column and row with the new value.
101+
Error updateCellValue(StringRef ColName, int Row, StringRef NewValue);
102+
100103
// Renames a column.
101104
Error renameColumn(StringRef OldName, StringRef NewName);
102105

llvm/lib/Support/PropertySetIO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[];
197197
constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[];
198198
constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[];
199199
constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[];
200+
constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[];
200201
constexpr char PropertySetRegistry::SYCL_MISC_PROP[];
201202
constexpr char PropertySetRegistry::SYCL_ASSERT_USED[];
202203

llvm/lib/Support/SimpleTable.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,16 @@ Error SimpleTable::replaceColumn(StringRef Name, const SimpleTable &Src,
109109
return Error::success();
110110
}
111111

112+
Error SimpleTable::updateCellValue(StringRef ColName, int Row,
113+
StringRef NewValue) {
114+
if (getNumColumns() == 0)
115+
return makeError("empty table");
116+
if (Row > getNumRows() || Row < 0)
117+
return makeError("row index out of bounds");
118+
Rows[Row][getColumnId(ColName)] = NewValue.str();
119+
return Error::success();
120+
}
121+
112122
Error SimpleTable::renameColumn(StringRef OldName, StringRef NewName) {
113123
int I = getColumnId(OldName);
114124

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[A|B|C|D]
2+
aaa|bbb|100|XXX
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Insert %S/Inputs/gold.txt into column A at row index 0
2+
RUN: file-table-tform --replace_cell=A,0 %S/Inputs/s.txt %S/Inputs/gold.txt -o t.txt
3+
4+
-- Copy file in column A from the only row
5+
RUN: file-table-tform --copy_single_file=A,0 t.txt -o u.txt
6+
7+
-- Verify result
8+
RUN: diff u.txt %S/Inputs/gold.txt
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; This test checks that the post-link tool generates SYCL program metadata.
2+
;
3+
; RUN: sycl-post-link -emit-program-metadata -S %s -o %t.files.table
4+
; RUN: FileCheck %s -input-file=%t.files.table --check-prefixes CHECK-TABLE
5+
; RUN: FileCheck %s -input-file=%t.files_0.prop --match-full-lines --check-prefixes CHECK-PROP
6+
7+
target triple = "spir64-unknown-unknown-sycldevice"
8+
9+
!0 = !{i32 1, i32 2, i32 4}
10+
11+
define weak_odr spir_kernel void @SpirKernel1(float %arg1) !reqd_work_group_size !0 {
12+
call void @foo(float %arg1)
13+
ret void
14+
}
15+
16+
declare void @foo(float)
17+
18+
; CHECK-PROP: [SYCL/program metadata]
19+
; // Base64 encoding in the prop file (including 8 bytes length):
20+
; CHECK-PROP-NEXT: SpirKernel1@reqd_work_group_size=2|gBAAAAAAAAQAAAAACAAAAQAAAAA
21+
22+
; CHECK-TABLE: [Code|Properties]
23+
; CHECK-TABLE-NEXT: {{.*}}files_0.prop
24+
; CHECK-TABLE-EMPTY:

0 commit comments

Comments
 (0)