Skip to content

Commit d3255ee

Browse files
committed
Merge remote-tracking branch 'upstream/sycl' into udit/itt_annot
2 parents 4427bc0 + a0b5f56 commit d3255ee

File tree

244 files changed

+8790
-1713
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

244 files changed

+8790
-1713
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ sycl/doc/extensions/ @intel/dpcpp-specification-reviewers
3636

3737
# Unified Runtime
3838
sycl/cmake/modules/FetchUnifiedRuntime.cmake @intel/unified-runtime-reviewers
39+
sycl/cmake/modules/UnifiedRuntimeTag.cmake @intel/unified-runtime-reviewers
3940
sycl/include/sycl/detail/ur.hpp @intel/unified-runtime-reviewers
4041
sycl/source/detail/posix_ur.cpp @intel/unified-runtime-reviewers
4142
sycl/source/detail/ur.cpp @intel/unified-runtime-reviewers

.github/workflows/pr-code-format.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
pull_request:
88
branches:
99
- main
10+
- sycl
11+
- sycl-devops-pr/**
12+
- sycl-rel-**
1013
- 'users/**'
1114

1215
jobs:

.github/workflows/sycl-linux-build.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ jobs:
212212
if: always() && !cancelled() && contains(inputs.changes, 'libdevice')
213213
run: |
214214
cmake --build $GITHUB_WORKSPACE/build --target check-libdevice
215+
- name: Check E2E test requirements
216+
if: always() && !cancelled() && !contains(inputs.changes, 'sycl')
217+
run: |
218+
# TODO consider moving this to Dockerfile.
219+
export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
220+
LIT_OPTS="--allow-empty-runs" LIT_FILTER="e2e_test_requirements" cmake --build $GITHUB_WORKSPACE/build --target check-sycl
215221
- name: Install
216222
if: ${{ always() && !cancelled() && steps.build.conclusion == 'success' }}
217223
# TODO replace utility installation with a single CMake target

.github/workflows/sycl-linux-run-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ jobs:
185185
with:
186186
path: khronos_sycl_cts
187187
repository: 'KhronosGroup/SYCL-CTS'
188-
ref: 'SYCL-2020'
189-
default_branch: 'SYCL-2020'
188+
ref: 'main'
189+
default_branch: 'main'
190190
cache_path: "/__w/repo_cache/"
191191
- name: SYCL CTS GIT submodules init
192192
if: inputs.tests_selector == 'cts'

buildbot/configure.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def do_configure(args):
6464

6565
sycl_enable_xpti_tracing = "ON"
6666
xpti_enable_werror = "OFF"
67+
llvm_enable_zstd = "ON"
6768

6869
if sys.platform != "darwin":
6970
sycl_enabled_backends.append("level_zero")
@@ -177,6 +178,8 @@ def do_configure(args):
177178
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
178179
"-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform),
179180
"-DLLVM_BUILD_TOOLS=ON",
181+
"-DLLVM_ENABLE_ZSTD={}".format(llvm_enable_zstd),
182+
"-DLLVM_USE_STATIC_ZSTD=ON",
180183
"-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
181184
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
182185
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.

clang/include/clang/Basic/Attr.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,12 +1566,12 @@ def SYCLType: InheritableAttr {
15661566
let Subjects = SubjectList<[CXXRecord, Enum], ErrorDiag>;
15671567
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
15681568
let Args = [EnumArgument<"Type", "SYCLType", /*is_string=*/true,
1569-
["accessor", "local_accessor",
1569+
["accessor", "local_accessor", "work_group_memory",
15701570
"specialization_id", "kernel_handler", "buffer_location",
15711571
"no_alias", "accessor_property_list", "group",
15721572
"private_memory", "aspect", "annotated_ptr", "annotated_arg",
15731573
"stream", "sampler", "host_pipe", "multi_ptr"],
1574-
["accessor", "local_accessor",
1574+
["accessor", "local_accessor", "work_group_memory",
15751575
"specialization_id", "kernel_handler", "buffer_location",
15761576
"no_alias", "accessor_property_list", "group",
15771577
"private_memory", "aspect", "annotated_ptr", "annotated_arg",

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,10 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
398398
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
399399
def err_drv_sycl_thinlto_split_off: Error<
400400
"'%0' is not supported when '%1' is set with '-fsycl'">;
401+
def err_drv_sycl_offload_arch_new_driver: Error<
402+
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
403+
def err_drv_sycl_offload_arch_missing_value : Error<
404+
"must pass in an explicit cpu or gpu architecture to '--offload-arch'">;
401405
def warn_drv_sycl_offload_target_duplicate : Warning<
402406
"SYCL offloading target '%0' is similar to target '%1' already specified; "
403407
"will be ignored">, InGroup<SyclTarget>;

clang/include/clang/Driver/Action.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ class OffloadWrapperJobAction : public JobAction {
698698
// Get the compilation step setting.
699699
bool getCompileStep() const { return CompileStep; }
700700

701+
// Set the individual wrapping setting. This is used to tell the wrapper job
702+
// action that the wrapping (and subsequent compile step) should be done
703+
// with for-each instead of using -batch.
704+
void setWrapIndividualFiles() { WrapIndividualFiles = true; }
705+
706+
// Get the individual wrapping setting.
707+
bool getWrapIndividualFiles() const { return WrapIndividualFiles; }
708+
701709
// Set the offload kind for the current wrapping job action. Default usage
702710
// is to use the kind of the current toolchain.
703711
void setOffloadKind(OffloadKind SetKind) { Kind = SetKind; }
@@ -707,6 +715,7 @@ class OffloadWrapperJobAction : public JobAction {
707715

708716
private:
709717
bool CompileStep = true;
718+
bool WrapIndividualFiles = false;
710719
OffloadKind Kind = OFK_None;
711720
};
712721

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4658,10 +4658,14 @@ def image__base : Separate<["-"], "image_base">;
46584658
def include_ : JoinedOrSeparate<["-", "--"], "include">, Group<clang_i_Group>, EnumName<"include">,
46594659
MetaVarName<"<file>">, HelpText<"Include file before parsing">,
46604660
Visibility<[ClangOption, CC1Option]>;
4661-
def include_footer : Separate<["-"], "include-footer">, Group<clang_i_Group>,
4661+
def include_internal_footer : Separate<["-"], "include-internal-footer">, Group<clang_i_Group>,
46624662
Visibility<[CC1Option]>,
46634663
HelpText<"Name of the footer integration file">, MetaVarName<"<file>">,
46644664
MarshallingInfoString<PreprocessorOpts<"IncludeFooter">>;
4665+
def include_internal_header : Separate<["-"], "include-internal-header">, Group<clang_i_Group>,
4666+
Visibility<[CC1Option]>,
4667+
HelpText<"Name of the header integration file">, MetaVarName<"<file>">,
4668+
MarshallingInfoString<PreprocessorOpts<"IncludeHeader">>;
46654669
def include_pch : Separate<["-"], "include-pch">, Group<clang_i_Group>,
46664670
Visibility<[ClangOption, CC1Option]>,
46674671
HelpText<"Include precompiled header file">, MetaVarName<"<file>">,

clang/include/clang/Lex/PreprocessorOptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class PreprocessorOptions {
6868
std::vector<std::pair<std::string, bool/*isUndef*/>> Macros;
6969
std::vector<std::string> Includes;
7070
std::string IncludeFooter;
71+
std::string IncludeHeader;
7172
std::vector<std::string> MacroIncludes;
7273

7374
/// Perform extra checks when loading PCM files for mutable file systems.

clang/include/clang/Sema/SemaSYCL.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ class SYCLIntegrationHeader {
6262
kind_pointer,
6363
kind_specialization_constants_buffer,
6464
kind_stream,
65-
kind_last = kind_stream
65+
kind_work_group_memory,
66+
kind_last = kind_work_group_memory
6667
};
6768

6869
public:

clang/lib/Driver/Driver.cpp

Lines changed: 166 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,12 +1191,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11911191
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11921192
llvm::StringMap<StringRef> FoundNormalizedTriples;
11931193
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
1194+
// StringSet to contain SYCL target triples.
1195+
llvm::StringSet<> SYCLTriples;
11941196
if (HasSYCLTargetsOption) {
11951197
// At this point, we know we have a valid combination
11961198
// of -fsycl*target options passed
11971199
Arg *SYCLTargetsValues = SYCLTargets;
11981200
if (SYCLTargetsValues) {
1199-
llvm::StringSet<> SYCLTriples;
12001201
if (SYCLTargetsValues->getNumValues()) {
12011202

12021203
// Multiple targets are currently not supported when using
@@ -1296,6 +1297,109 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12961297
Diag(clang::diag::warn_drv_empty_joined_argument)
12971298
<< SYCLTargetsValues->getAsString(C.getInputArgs());
12981299
}
1300+
}
1301+
// If the user specified --offload-arch, deduce the offloading
1302+
// target triple(s) from the set of architecture(s).
1303+
// Create a toolchain for each valid triple.
1304+
// We do not support SYCL offloading if any of the inputs is a
1305+
// .cu (for CUDA type) or .hip (for HIP type) file.
1306+
else if (HasValidSYCLRuntime &&
1307+
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
1308+
!IsCuda) {
1309+
// SYCL offloading to AOT Targets with '--offload-arch'
1310+
// is currently enabled only with '--offload-new-driver' option.
1311+
// Emit a diagnostic if '--offload-arch' is invoked without
1312+
// '--offload-new driver' option.
1313+
if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
1314+
options::OPT_no_offload_new_driver, false)) {
1315+
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
1316+
return;
1317+
}
1318+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1319+
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1320+
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1321+
HostTC->getTriple());
1322+
1323+
// Attempt to deduce the offloading triple from the set of architectures.
1324+
// We need to temporarily create these toolchains so that we can access
1325+
// tools for inferring architectures.
1326+
llvm::DenseSet<StringRef> Archs;
1327+
if (NVPTXTriple) {
1328+
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1329+
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
1330+
for (StringRef Arch :
1331+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1332+
Archs.insert(Arch);
1333+
}
1334+
if (AMDTriple) {
1335+
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1336+
*this, *AMDTriple, *HostTC, C.getInputArgs());
1337+
for (StringRef Arch :
1338+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
1339+
Archs.insert(Arch);
1340+
}
1341+
if (!AMDTriple && !NVPTXTriple) {
1342+
for (StringRef Arch :
1343+
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
1344+
Archs.insert(Arch);
1345+
}
1346+
for (StringRef Arch : Archs) {
1347+
if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch(
1348+
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1349+
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1350+
} else if (AMDTriple &&
1351+
IsSYCLSupportedAMDGPUArch(StringToOffloadArch(
1352+
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1353+
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1354+
} else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) {
1355+
DerivedArchs[MakeSYCLDeviceTriple("spir64_x86_64").getTriple()].insert(
1356+
Arch);
1357+
} else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) {
1358+
StringRef IntelGPUArch;
1359+
// For Intel Graphics AOT target, valid values for '--offload-arch'
1360+
// are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
1361+
// compiler) via the '-device' option. The mapIntelGPUArchName
1362+
// function maps the accepted values for '--offload-arch' to enable SYCL
1363+
// offloading to Intel GPUs and the corresponding '-device' value passed
1364+
// to OCLOC.
1365+
IntelGPUArch = mapIntelGPUArchName(Arch).data();
1366+
DerivedArchs[MakeSYCLDeviceTriple("spir64_gen").getTriple()].insert(
1367+
IntelGPUArch);
1368+
} else {
1369+
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
1370+
return;
1371+
}
1372+
}
1373+
// Emit an error if architecture value is not provided
1374+
// to --offload-arch.
1375+
if (Archs.empty()) {
1376+
Diag(clang::diag::err_drv_sycl_offload_arch_missing_value);
1377+
return;
1378+
}
1379+
1380+
for (const auto &TripleAndArchs : DerivedArchs)
1381+
SYCLTriples.insert(TripleAndArchs.first());
1382+
1383+
for (const auto &Val : SYCLTriples) {
1384+
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
1385+
std::string NormalizedName = SYCLTargetTriple.normalize();
1386+
1387+
// Make sure we don't have a duplicate triple.
1388+
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
1389+
if (Duplicate != FoundNormalizedTriples.end()) {
1390+
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
1391+
<< Val.getKey() << Duplicate->second;
1392+
continue;
1393+
}
1394+
1395+
// Store the current triple so that we can check for duplicates in the
1396+
// following iterations.
1397+
FoundNormalizedTriples[NormalizedName] = Val.getKey();
1398+
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
1399+
}
1400+
1401+
addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
1402+
12991403
} else {
13001404
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
13011405
// For -fsycl-device-only, we also setup the implied triple as needed.
@@ -5432,9 +5536,58 @@ class OffloadingActionBuilder final {
54325536
BundlingActions, types::TY_Object);
54335537
if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
54345538
OWA->setOffloadKind(Action::OFK_Host);
5539+
// The Backend compilation step performed here is being done for
5540+
// creating FPGA archives. The possible split binaries after
5541+
// sycl-post-link need to be individually wrapped as opposed to
5542+
// being passed into the clang-offload-wrapper via a table and
5543+
// using the -batch option - effectively creating a single
5544+
// binary. The resulting archive created from -fsycl-link should
5545+
// not contain the singular binary, but should be individual
5546+
// binaries to be consumed later by either the -fsycl-link=image
5547+
// device compilation step or being linked into the final exe.
5548+
//
5549+
// Typical compile flow:
5550+
// .bc
5551+
// |
5552+
// sycl-post-link -split=kernel
5553+
// |
5554+
// +--------+--------+
5555+
// | | |
5556+
// split1 split2 split3
5557+
// | | |
5558+
// llvm-spirv llvm-spirv llvm-spirv
5559+
// | | |
5560+
// ocloc ocloc ocloc
5561+
// | | |
5562+
// +--------+--------+
5563+
// |
5564+
// clang-offload-wrapper -batch
5565+
// |
5566+
// .o
5567+
//
5568+
// Individual wrap compile flow:
5569+
// .bc
5570+
// |
5571+
// sycl-post-link -split=kernel
5572+
// |
5573+
// +--------+--------+
5574+
// | | |
5575+
// split1 split2 split3
5576+
// | | |
5577+
// llvm-spirv llvm-spirv llvm-spirv
5578+
// | | |
5579+
// ocloc ocloc ocloc
5580+
// | | |
5581+
// wrap wrap wrap
5582+
// | | |
5583+
// .o .o .o
5584+
//
54355585
Action *CompiledDeviceAction =
5436-
C.MakeAction<OffloadWrapperJobAction>(WrapperItems,
5437-
types::TY_Object);
5586+
C.MakeAction<OffloadWrapperJobAction>(FPGAAOTAction,
5587+
types::TY_Tempfilelist);
5588+
if (auto *OWA =
5589+
dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5590+
OWA->setWrapIndividualFiles();
54385591
addDeps(CompiledDeviceAction, TC, BoundArch);
54395592
}
54405593
addDeps(DeviceAction, TC, BoundArch);
@@ -5708,6 +5861,9 @@ class OffloadingActionBuilder final {
57085861
};
57095862

57105863
Action *ExtractIRFilesAction = createExtractIRFilesAction();
5864+
// Device binaries that are individually wrapped when creating an
5865+
// FPGA Archive.
5866+
ActionList FPGAArchiveWrapperInputs;
57115867

57125868
if (IsNVPTX || IsAMDGCN) {
57135869
JobAction *FinAction =
@@ -5793,6 +5949,7 @@ class OffloadingActionBuilder final {
57935949
FileTableTformJobAction::COL_CODE,
57945950
FileTableTformJobAction::COL_CODE);
57955951
WrapperInputs.push_back(ReplaceFilesAction);
5952+
FPGAArchiveWrapperInputs.push_back(BuildCodeAction);
57965953
}
57975954
if (SkipWrapper) {
57985955
// Wrapper step not requested.
@@ -5827,8 +5984,11 @@ class OffloadingActionBuilder final {
58275984
if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
58285985
OWA->setOffloadKind(Action::OFK_Host);
58295986
Action *CompiledDeviceAction =
5830-
C.MakeAction<OffloadWrapperJobAction>(WrapperInputs,
5831-
types::TY_Object);
5987+
C.MakeAction<OffloadWrapperJobAction>(
5988+
FPGAArchiveWrapperInputs, types::TY_Tempfilelist);
5989+
if (auto *OWA =
5990+
dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5991+
OWA->setWrapIndividualFiles();
58325992
addDeps(CompiledDeviceAction, TC, nullptr);
58335993
}
58345994
addDeps(DeviceAction, TC, nullptr);
@@ -6357,7 +6517,7 @@ class OffloadingActionBuilder final {
63576517
if (GpuInitHasErrors)
63586518
return true;
63596519

6360-
int GenIndex = 0;
6520+
size_t GenIndex = 0;
63616521
// Fill SYCLTargetInfoList
63626522
for (auto &TT : SYCLTripleList) {
63636523
auto TCIt = llvm::find_if(

0 commit comments

Comments
 (0)