Skip to content

Commit 652a40e

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents eb1d800 + 8d7396d commit 652a40e

File tree

70 files changed

+1459
-283
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+1459
-283
lines changed

.github/CODEOWNERS

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,6 @@ sycl/test-e2e/Plugin/*level-zero* @intel/dpcpp-l0-pi-reviewers
4141
# Unified Runtime plugin
4242
sycl/plugins/unified_runtime/ @intel/dpcpp-l0-pi-reviewers
4343

44-
# ESIMD CPU emulator plug-in
45-
sycl/plugins/esimd_emulator/ @intel/dpcpp-esimd-reviewers
46-
4744
# CUDA and HIP plugins
4845
sycl/plugins/**/cuda/ @intel/llvm-reviewers-cuda
4946
sycl/plugins/**/hip/ @intel/llvm-reviewers-cuda

buildbot/configure.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import argparse
22
import os
33
import platform
4+
import shlex
45
import subprocess
56
import sys
67

@@ -209,7 +210,7 @@ def do_configure(args):
209210
"-DSYCL_LIBCXX_INCLUDE_PATH={}".format(args.libcxx_include),
210211
"-DSYCL_LIBCXX_LIBRARY_PATH={}".format(args.libcxx_library)])
211212

212-
print("[Cmake Command]: {}".format(" ".join(cmake_cmd)))
213+
print("[Cmake Command]: {}".format(" ".join(map(shlex.quote, cmake_cmd))))
213214

214215
try:
215216
subprocess.check_call(cmake_cmd, cwd=abs_obj_dir)

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5562,8 +5562,9 @@ class OffloadingActionBuilder final {
55625562
DA.add(*DeviceWrappingAction, *TC, BoundArch, Action::OFK_SYCL);
55635563
continue;
55645564
}
5565-
if (IsNVPTX && Args.hasArg(options::OPT_fsycl_embed_ir)) {
5566-
// When compiling for Nvidia/CUDA devices and the user requested the
5565+
if ((IsNVPTX || IsAMDGCN) &&
5566+
Args.hasArg(options::OPT_fsycl_embed_ir)) {
5567+
// When compiling for Nvidia/AMD devices and the user requested the
55675568
// IR to be embedded in the application (via option), run the output
55685569
// of sycl-post-link (filetable referencing LLVM Bitcode + symbols)
55695570
// through the offload wrapper and link the resulting object to the

clang/test/Driver/sycl-embed-ir.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/// Tests for -fsycl-embed-ir
2+
3+
// UNSUPPORTED: system-windows
4+
5+
// RUN: %clangxx -fsycl -fsycl-targets=nvidia_gpu_sm_80 -fsycl-embed-ir -ccc-print-phases %s 2>&1 | \
6+
// RUN: FileCheck -check-prefix=CHECK-NV %s
7+
8+
// CHECK-NV: [[IR:[0-9]+]]: compiler, {4}, ir, (device-sycl, sm_80)
9+
// CHECK-NV: [[POSTLINK:[0-9]+]]: sycl-post-link, {{{.*}}}, ir, (device-sycl, sm_80)
10+
// CHECK-NV: [[WRAP:[0-9]+]]: clang-offload-wrapper, {[[POSTLINK]]}, object, (device-sycl, sm_80)
11+
// CHECK-NV: offload, "host-sycl (x86_64-unknown-linux-gnu)" {{{.*}}}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {[[WRAP]]}, "device-sycl (nvptx64-nvidia-cuda:sm_80)" {{{.*}}}, image
12+
13+
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1010 -fsycl-embed-ir -ccc-print-phases %s 2>&1 | \
14+
// RUN: FileCheck -check-prefix=CHECK-AMD %s
15+
16+
// CHECK-AMD: [[IR:[0-9]+]]: compiler, {4}, ir, (device-sycl, gfx1010)
17+
// CHECK-AMD: [[POSTLINK:[0-9]+]]: sycl-post-link, {{{.*}}}, ir, (device-sycl, gfx1010)
18+
// CHECK-AMD: [[WRAP:[0-9]+]]: clang-offload-wrapper, {[[POSTLINK]]}, object, (device-sycl, gfx1010)
19+
// CHECK-AMD: offload, "host-sycl (x86_64-unknown-linux-gnu)" {{{.*}}}, "device-sycl (amdgcn-amd-amdhsa:gfx1010)" {[[WRAP]]}, "device-sycl (amdgcn-amd-amdhsa:gfx1010)" {{{.*}}}, image

devops/scripts/install_build_tools.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ apt update && apt install -yqq \
1616
libffi-dev \
1717
libva-dev \
1818
libtool \
19-
libdw1 \
2019
wget \
2120
sudo \
2221
zstd \

sycl-fusion/common/include/Kernel.h

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,30 @@ namespace jit_compiler {
2020

2121
using BinaryAddress = const uint8_t *;
2222

23+
/// Possible barrier flags
24+
enum class BarrierFlags : uint32_t {
25+
None = 0, // Do not insert barrier
26+
Local = 1, // Ensure correct ordering of memory operations to local memory
27+
Global = 2, // Ensure correct ordering of memory operations to global memory
28+
LocalAndGlobal = Local | Global
29+
};
30+
31+
constexpr BarrierFlags getNoBarrierFlag() { return BarrierFlags::None; }
32+
constexpr BarrierFlags getLocalAndGlobalBarrierFlag() {
33+
return BarrierFlags::LocalAndGlobal;
34+
}
35+
constexpr bool isNoBarrierFlag(BarrierFlags Flag) {
36+
return Flag == BarrierFlags::None;
37+
}
38+
constexpr bool hasLocalBarrierFlag(BarrierFlags Flag) {
39+
return static_cast<uint32_t>(Flag) &
40+
static_cast<uint32_t>(BarrierFlags::Local);
41+
}
42+
constexpr bool hasGlobalBarrierFlag(BarrierFlags Flag) {
43+
return static_cast<uint32_t>(Flag) &
44+
static_cast<uint32_t>(BarrierFlags::Global);
45+
}
46+
2347
///
2448
/// Enumerate possible kinds of parameters.
2549
/// 1:1 correspondence with the definition in kernel_desc.hpp in the DPC++ SYCL
@@ -35,7 +59,7 @@ enum class ParameterKind : uint32_t {
3559
};
3660

3761
/// Different binary formats supported as input to the JIT compiler.
38-
enum class BinaryFormat : uint32_t { INVALID, LLVM, SPIRV, PTX };
62+
enum class BinaryFormat : uint32_t { INVALID, LLVM, SPIRV, PTX, AMDGCN };
3963

4064
/// Information about a device intermediate representation module (e.g., SPIR-V,
4165
/// LLVM IR) from DPC++.

sycl-fusion/common/lib/KernelIO.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ template <> struct ScalarEnumerationTraits<jit_compiler::BinaryFormat> {
4848
IO.enumCase(BF, "LLVM", jit_compiler::BinaryFormat::LLVM);
4949
IO.enumCase(BF, "SPIRV", jit_compiler::BinaryFormat::SPIRV);
5050
IO.enumCase(BF, "PTX", jit_compiler::BinaryFormat::PTX);
51+
IO.enumCase(BF, "AMDGCN", jit_compiler::BinaryFormat::AMDGCN);
5152
IO.enumCase(BF, "INVALID", jit_compiler::BinaryFormat::INVALID);
5253
}
5354
};

sycl-fusion/jit-compiler/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ add_llvm_library(sycl-fusion
99
lib/fusion/ModuleHelper.cpp
1010
lib/helper/ConfigHelper.cpp
1111

12+
DEPENDS
13+
intrinsics_gen
14+
1215
LINK_COMPONENTS
1316
BitReader
1417
Core
@@ -50,6 +53,10 @@ if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
5053
target_compile_definitions(sycl-fusion PRIVATE FUSION_JIT_SUPPORT_PTX)
5154
endif()
5255

56+
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
57+
target_compile_definitions(sycl-fusion PRIVATE FUSION_JIT_SUPPORT_AMDGCN)
58+
endif()
59+
5360
if (BUILD_SHARED_LIBS)
5461
if(NOT MSVC AND NOT APPLE)
5562
# Manage symbol visibility through the linker to make sure no LLVM symbols

sycl-fusion/jit-compiler/include/JITContext.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "Hashing.h"
2121
#include "Kernel.h"
22+
#include "Options.h"
2223
#include "Parameter.h"
2324

2425
namespace llvm {
@@ -28,7 +29,7 @@ class LLVMContext;
2829
namespace jit_compiler {
2930

3031
using CacheKeyT =
31-
std::tuple<std::vector<std::string>, ParamIdentList, int,
32+
std::tuple<std::vector<std::string>, ParamIdentList, BarrierFlags,
3233
std::vector<ParameterInternalization>, std::vector<JITConstant>,
3334
// This field of the cache is optional because, if all of the
3435
// ranges are equal, we will perform no remapping, so that fused

sycl-fusion/jit-compiler/include/KernelFusion.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class KernelFusion {
6060
const std::vector<SYCLKernelInfo> &KernelInformation,
6161
const std::vector<std::string> &KernelsToFuse,
6262
const std::string &FusedKernelName,
63-
jit_compiler::ParamIdentList &Identities, int BarriersFlags,
63+
jit_compiler::ParamIdentList &Identities,
64+
BarrierFlags BarriersFlags,
6465
const std::vector<jit_compiler::ParameterInternalization>
6566
&Internalization,
6667
const std::vector<jit_compiler::JITConstant> &JITConstants);

sycl-fusion/jit-compiler/lib/KernelFusion.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ static bool isTargetFormatSupported(BinaryFormat TargetFormat) {
5858
#else // FUSION_JIT_SUPPORT_PTX
5959
return false;
6060
#endif // FUSION_JIT_SUPPORT_PTX
61+
}
62+
case BinaryFormat::AMDGCN: {
63+
#ifdef FUSION_JIT_SUPPORT_AMDGCN
64+
return true;
65+
#else // FUSION_JIT_SUPPORT_AMDGCN
66+
return false;
67+
#endif // FUSION_JIT_SUPPORT_AMDGCN
6168
}
6269
default:
6370
return false;
@@ -69,7 +76,7 @@ FusionResult KernelFusion::fuseKernels(
6976
const std::vector<SYCLKernelInfo> &KernelInformation,
7077
const std::vector<std::string> &KernelsToFuse,
7178
const std::string &FusedKernelName, ParamIdentList &Identities,
72-
int BarriersFlags,
79+
BarrierFlags BarriersFlags,
7380
const std::vector<jit_compiler::ParameterInternalization> &Internalization,
7481
const std::vector<jit_compiler::JITConstant> &Constants) {
7582
// Initialize the configuration helper to make the options for this invocation

sycl-fusion/jit-compiler/lib/fusion/FusionPipeline.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static unsigned getFlatAddressSpace(Module &Mod) {
4040
// Ideally, we could get this information from the TargetTransformInfo, but
4141
// the SPIR-V backend does not yet seem to have an implementation for that.
4242
llvm::Triple Tri(Mod.getTargetTriple());
43-
if (Tri.isNVPTX()) {
43+
if (Tri.isNVPTX() || Tri.isAMDGCN()) {
4444
return 0;
4545
}
4646
if (Tri.isSPIRV() || Tri.isSPIR()) {
@@ -53,7 +53,7 @@ static unsigned getFlatAddressSpace(Module &Mod) {
5353

5454
std::unique_ptr<SYCLModuleInfo>
5555
FusionPipeline::runFusionPasses(Module &Mod, SYCLModuleInfo &InputInfo,
56-
int BarriersFlags) {
56+
BarrierFlags BarriersFlags) {
5757
// Perform the actual kernel fusion, i.e., generate a kernel function for the
5858
// fused kernel from the kernel functions of the input kernels. This is done
5959
// by the SYCLKernelFusion LLVM pass, which is run here through a custom LLVM

sycl-fusion/jit-compiler/lib/fusion/FusionPipeline.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class FusionPipeline {
2525
/// contain an entry for the fused kernel.
2626
static std::unique_ptr<SYCLModuleInfo>
2727
runFusionPasses(llvm::Module &Mod, SYCLModuleInfo &InputInfo,
28-
int BarriersFlags);
28+
BarrierFlags BarriersFlags);
2929
};
3030
} // namespace fusion
3131
} // namespace jit_compiler

sycl-fusion/jit-compiler/lib/translation/KernelTranslation.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,14 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
191191
KernelBin = *BinaryOrError;
192192
break;
193193
}
194+
case BinaryFormat::AMDGCN: {
195+
llvm::Expected<KernelBinary *> BinaryOrError =
196+
translateToAMDGCN(Kernel, Mod, JITCtx);
197+
if (auto Error = BinaryOrError.takeError())
198+
return Error;
199+
KernelBin = *BinaryOrError;
200+
break;
201+
}
194202
default: {
195203
return createStringError(
196204
inconvertibleErrorCode(),
@@ -287,3 +295,75 @@ KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
287295
return &JITCtx.emplaceKernelBinary(std::move(PTXASM), BinaryFormat::PTX);
288296
#endif // FUSION_JIT_SUPPORT_PTX
289297
}
298+
299+
llvm::Expected<KernelBinary *>
300+
KernelTranslator::translateToAMDGCN(SYCLKernelInfo &KernelInfo,
301+
llvm::Module &Mod, JITContext &JITCtx) {
302+
#ifndef FUSION_JIT_SUPPORT_AMDGCN
303+
(void)KernelInfo;
304+
(void)Mod;
305+
(void)JITCtx;
306+
return createStringError(inconvertibleErrorCode(),
307+
"AMDGPU translation not supported in this build");
308+
#else // FUSION_JIT_SUPPORT_AMDGCN
309+
310+
LLVMInitializeAMDGPUTargetInfo();
311+
LLVMInitializeAMDGPUTarget();
312+
LLVMInitializeAMDGPUAsmPrinter();
313+
LLVMInitializeAMDGPUTargetMC();
314+
315+
static const char *TARGET_CPU_ATTRIBUTE = "target-cpu";
316+
static const char *TARGET_FEATURE_ATTRIBUTE = "target-features";
317+
318+
std::string TargetTriple{"amdgcn-amd-amdhsa"};
319+
320+
std::string ErrorMessage;
321+
const auto *Target =
322+
llvm::TargetRegistry::lookupTarget(TargetTriple, ErrorMessage);
323+
324+
if (!Target)
325+
return createStringError(
326+
inconvertibleErrorCode(),
327+
"Failed to load and translate AMDGCN LLVM IR module with error %s",
328+
ErrorMessage.c_str());
329+
330+
// Set to the lowest tested target according to the GetStartedGuide, section
331+
// "Build DPC++ toolchain with support for HIP AMD"
332+
llvm::StringRef TargetCPU{"gfx906"};
333+
llvm::StringRef TargetFeatures{""};
334+
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name)) {
335+
if (KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
336+
TargetCPU =
337+
KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
338+
}
339+
if (KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) {
340+
TargetFeatures = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE)
341+
.getValueAsString();
342+
}
343+
}
344+
345+
// FIXME: Check whether we can provide more accurate target information here
346+
auto *TargetMachine = Target->createTargetMachine(
347+
TargetTriple, TargetCPU, TargetFeatures, {}, llvm::Reloc::PIC_,
348+
std::nullopt, llvm::CodeGenOptLevel::Default);
349+
350+
std::string AMDObj;
351+
{
352+
llvm::legacy::PassManager PM;
353+
llvm::raw_string_ostream OBJStream{AMDObj};
354+
llvm::buffer_ostream BufferedOBJ{OBJStream};
355+
356+
if (TargetMachine->addPassesToEmitFile(PM, BufferedOBJ, nullptr,
357+
llvm::CodeGenFileType::ObjectFile)) {
358+
return createStringError(
359+
inconvertibleErrorCode(),
360+
"Failed to construct pass pipeline to emit output");
361+
}
362+
363+
PM.run(Mod);
364+
OBJStream.flush();
365+
}
366+
367+
return &JITCtx.emplaceKernelBinary(std::move(AMDObj), BinaryFormat::AMDGCN);
368+
#endif // FUSION_JIT_SUPPORT_AMDGCN
369+
}

sycl-fusion/jit-compiler/lib/translation/KernelTranslation.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class KernelTranslator {
4343

4444
static llvm::Expected<KernelBinary *>
4545
translateToPTX(SYCLKernelInfo &Kernel, llvm::Module &Mod, JITContext &JITCtx);
46+
47+
static llvm::Expected<KernelBinary *>
48+
translateToAMDGCN(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
49+
JITContext &JITCtx);
4650
};
4751
} // namespace translation
4852
} // namespace jit_compiler

sycl-fusion/passes/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
3030
target_compile_definitions(SYCLKernelFusion PRIVATE FUSION_JIT_SUPPORT_PTX)
3131
endif()
3232

33+
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
34+
target_compile_definitions(SYCLKernelFusion PRIVATE FUSION_JIT_SUPPORT_AMDGCN)
35+
endif()
36+
3337
# Static library for linking with the jit_compiler
3438
add_llvm_library(SYCLKernelFusionPasses
3539
SYCLFusionPasses.cpp
@@ -68,3 +72,7 @@ target_link_libraries(SYCLKernelFusionPasses
6872
if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
6973
target_compile_definitions(SYCLKernelFusionPasses PRIVATE FUSION_JIT_SUPPORT_PTX)
7074
endif()
75+
76+
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
77+
target_compile_definitions(SYCLKernelFusionPasses PRIVATE FUSION_JIT_SUPPORT_AMDGCN)
78+
endif()

sycl-fusion/passes/SYCLFusionPasses.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99
#include "llvm/Passes/PassBuilder.h"
1010
#include "llvm/Passes/PassPlugin.h"
1111

12+
#include "Kernel.h"
13+
1214
#include "internalization/Internalization.h"
1315
#include "kernel-fusion/SYCLKernelFusion.h"
1416
#include "kernel-info/SYCLKernelInfo.h"
1517
#include "syclcp/SYCLCP.h"
1618

1719
using namespace llvm;
20+
using namespace jit_compiler;
1821

1922
cl::opt<bool>
2023
NoBarriers("sycl-kernel-fusion-no-barriers",
@@ -28,8 +31,9 @@ llvm::PassPluginLibraryInfo getSYCLKernelFusionPluginInfo() {
2831
[](StringRef Name, ModulePassManager &MPM,
2932
ArrayRef<PassBuilder::PipelineElement>) {
3033
if (Name == "sycl-kernel-fusion") {
31-
int BarrierFlag =
32-
(NoBarriers) ? -1 : SYCLKernelFusion::DefaultBarriersFlags;
34+
BarrierFlags BarrierFlag =
35+
(NoBarriers) ? getNoBarrierFlag()
36+
: SYCLKernelFusion::DefaultBarriersFlags;
3337
MPM.addPass(SYCLKernelFusion(BarrierFlag));
3438
return true;
3539
}

sycl-fusion/passes/kernel-fusion/SYCLKernelFusion.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,12 @@ static FusionInsertPoints addGuard(IRBuilderBase &Builder,
234234
return {Entry, CallInsertion, Exit};
235235
}
236236

237-
static Expected<CallInst *>
238-
createFusionCall(IRBuilderBase &Builder, Function *F,
239-
ArrayRef<Value *> CallArgs,
240-
const jit_compiler::NDRange &SrcNDRange,
241-
const jit_compiler::NDRange &FusedNDRange, bool IsLast,
242-
int BarriersFlags, jit_compiler::Remapper &Remapper,
243-
bool ShouldRemap, TargetFusionInfo &TargetInfo) {
237+
static Expected<CallInst *> createFusionCall(
238+
IRBuilderBase &Builder, Function *F, ArrayRef<Value *> CallArgs,
239+
const jit_compiler::NDRange &SrcNDRange,
240+
const jit_compiler::NDRange &FusedNDRange, bool IsLast,
241+
jit_compiler::BarrierFlags BarriersFlags, jit_compiler::Remapper &Remapper,
242+
bool ShouldRemap, TargetFusionInfo &TargetInfo) {
244243
const auto IPs =
245244
addGuard(Builder, TargetInfo, SrcNDRange, FusedNDRange, IsLast);
246245

@@ -266,7 +265,7 @@ createFusionCall(IRBuilderBase &Builder, Function *F,
266265
Builder.SetInsertPoint(IPs.Exit);
267266

268267
// Insert barrier if needed
269-
if (!IsLast && BarriersFlags > 0) {
268+
if (!IsLast && !jit_compiler::isNoBarrierFlag(BarriersFlags)) {
270269
TargetInfo.createBarrierCall(Builder, BarriersFlags);
271270
}
272271

0 commit comments

Comments
 (0)