Skip to content

Commit 1fd30a2

Browse files
committed
Merge from 'sycl' to 'sycl-web' (12 commits)
2 parents 5c5709e + d058186 commit 1fd30a2

File tree

135 files changed

+8379
-8055
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+8379
-8055
lines changed

.github/workflows/sycl_precommit.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ jobs:
8282
secrets: inherit
8383
with:
8484
build_ref: ${{ github.event.pull_request.head.sha }}
85+
merge_ref: ${{ github.event.pull_request.base.sha }}
8586
build_cache_root: "/__w/"
8687
build_cache_size: "8G"
8788
build_artifact_suffix: "default"

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,10 +2349,6 @@ void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn,
23492349
Fn->setMetadata("kernel_arg_exclusive_ptr",
23502350
llvm::MDNode::get(VMContext, argSYCLAccessorPtrs));
23512351
}
2352-
if (LangOpts.SYCLIsNativeCPU) {
2353-
Fn->setMetadata("kernel_arg_type",
2354-
llvm::MDNode::get(VMContext, argTypeNames));
2355-
}
23562352
} else {
23572353
if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
23582354
Fn->setMetadata("kernel_arg_addr_space",

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9812,7 +9812,8 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
98129812
",+SPV_INTEL_global_variable_decorations"
98139813
",+SPV_INTEL_fpga_buffer_location"
98149814
",+SPV_INTEL_fpga_argument_interfaces"
9815-
",+SPV_INTEL_fpga_invocation_pipelining_attributes";
9815+
",+SPV_INTEL_fpga_invocation_pipelining_attributes"
9816+
",+SPV_INTEL_fpga_latency_control";
98169817
ExtArg = ExtArg + DefaultExtArg + INTELExtArg;
98179818
if (C.getDriver().IsFPGAHWMode())
98189819
// Enable several extensions on FPGA H/W exclusively

clang/test/CodeGenSYCL/native_cpu_basic.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
// This test checks for some basic Front End features for Native CPU:
22
// * Kernel name mangling
3-
// * kernel_arg_type metadata node
43
// * is-native-cpu module flag
54
// RUN: %clang_cc1 -fsycl-is-device -S -emit-llvm -internal-isystem %S/Inputs -fsycl-is-native-cpu -o %t.ll %s
65
// RUN: FileCheck -input-file=%t.ll %s
@@ -50,11 +49,9 @@ void gen() {
5049
test<float>(q);
5150
}
5251

53-
// Check name mangling and kernel_arg_type metadata
54-
// CHECK-DAG: @_ZTS6init_aIiE_NativeCPUKernel({{.*}}){{.*}}!kernel_arg_type ![[TYPE1:[0-9]*]]
55-
// CHECK-DAG: @_ZTS6init_aIfE_NativeCPUKernel({{.*}}){{.*}}!kernel_arg_type ![[TYPE3:[0-9]*]]
56-
// CHECK-DAG: ![[TYPE1]] = !{!"int*", !"sycl::range<1>", !"sycl::range<1>", !"sycl::id<1>", !"int"}
57-
// CHECK-DAG: ![[TYPE3]] = !{!"float*", !"sycl::range<1>", !"sycl::range<1>", !"sycl::id<1>", !"float"}
52+
// Check name mangling
53+
// CHECK-DAG: @_ZTS6init_aIiE_NativeCPUKernel_NativeCPUKernel({{.*}})
54+
// CHECK-DAG: @_ZTS6init_aIfE_NativeCPUKernel_NativeCPUKernel({{.*}})
5855

5956
// Check Native CPU module flag
6057
// CHECK-DAG: !{{[0-9]*}} = !{i32 1, !"is-native-cpu", i32 1}

clang/test/Driver/sycl-spirv-ext.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
// CHECK-DEFAULT-SAME:,+SPV_INTEL_fpga_buffer_location
4848
// CHECK-DEFAULT-SAME:,+SPV_INTEL_fpga_argument_interfaces
4949
// CHECK-DEFAULT-SAME:,+SPV_INTEL_fpga_invocation_pipelining_attributes
50+
// CHECK-DEFAULT-SAME:,+SPV_INTEL_fpga_latency_control
5051
// CHECK-DEFAULT-SAME:,+SPV_INTEL_token_type
5152
// CHECK-DEFAULT-SAME:,+SPV_INTEL_bfloat16_conversion
5253
// CHECK-DEFAULT-SAME:,+SPV_INTEL_joint_matrix
@@ -75,6 +76,7 @@
7576
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_arithmetic_fence
7677
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_buffer_location
7778
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_argument_interfaces
79+
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_latency_control
7880
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_usm_storage_classes
7981
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_runtime_aligned
8082
// CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_cluster_attributes,+SPV_INTEL_loop_fuse

devops/actions/cached_checkout/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ runs:
6161
git fetch origin $DEFAULT_BRANCH
6262
echo "FETCHED:"
6363
git log -1 origin/$DEFAULT_BRANCH
64-
echo "Merging it into the current workspace"
64+
echo "Merging ${{ inputs.merge_ref }} into the current workspace"
6565
# Set fake identity to fulfil git requirements
6666
git config --local user.email "[email protected]"
6767
git config --local user.name "GitHub Actions"

devops/containers/ubuntu2204_base.Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ RUN /install.sh
1313
# discover user home directory and fail a few LIT tests. Fixes UID and GID to
1414
# 1001, that is used as default by GitHub Actions.
1515
RUN groupadd -g 1001 sycl && useradd sycl -u 1001 -g 1001 -m -s /bin/bash
16-
# Add sycl user to video group so that it can access GPU
16+
# Add sycl user to video/irc groups so that it can access GPU
1717
RUN usermod -aG video sycl
18+
RUN usermod -aG irc sycl
1819
# Allow sycl user to run as sudo
1920
RUN echo "sycl ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
2021

devops/containers/ubuntu2204_build.Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ RUN apt install -yqq libnuma-dev wget gnupg2 && \
2727
# discover user home directory and fail a few LIT tests. Fixes UID and GID to
2828
# 1001, that is used as default by GitHub Actions.
2929
RUN groupadd -g 1001 sycl && useradd sycl -u 1001 -g 1001 -m -s /bin/bash
30-
# Add sycl user to video group so that it can access GPU
30+
# Add sycl user to video/irc groups so that it can access GPU
3131
RUN usermod -aG video sycl
32+
RUN usermod -aG irc sycl
3233

3334
COPY actions/cached_checkout /actions/cached_checkout
3435
COPY actions/cleanup /actions/cleanup

llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static const char *LegalSYCLFunctions[] = {
5050
"^sycl::_V1::multi_ptr<.+>::.+",
5151
"^sycl::_V1::nd_item<.+>::.+",
5252
"^sycl::_V1::group<.+>::.+",
53-
"^sycl::_V1::sub_group<.+>::.+",
53+
"^sycl::_V1::sub_group::.+",
5454
"^sycl::_V1::range<.+>::.+",
5555
"^sycl::_V1::kernel_handler::.+",
5656
"^sycl::_V1::cos",

llvm/lib/SYCLLowerIR/EmitSYCLNativeCPUHeader.cpp

Lines changed: 5 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -33,133 +33,12 @@
3333
using namespace llvm;
3434

3535
namespace {
36-
SmallVector<bool> getArgMask(const Function *F) {
37-
SmallVector<bool> Res;
38-
auto *UsedNode = F->getMetadata("sycl_kernel_omit_args");
39-
if (!UsedNode) {
40-
// the metadata node is not available if -fenable-sycl-dae
41-
// was not set; set everything to true in the mask.
42-
for (unsigned I = 0; I < F->getFunctionType()->getNumParams(); I++) {
43-
Res.push_back(true);
44-
}
45-
return Res;
46-
}
47-
auto NumOperands = UsedNode->getNumOperands();
48-
for (unsigned I = 0; I < NumOperands; I++) {
49-
auto &Op = UsedNode->getOperand(I);
50-
if (auto *CAM = dyn_cast<ConstantAsMetadata>(Op.get())) {
51-
if (auto *Const = dyn_cast<ConstantInt>(CAM->getValue())) {
52-
auto Val = Const->getValue();
53-
Res.push_back(!Val.getBoolValue());
54-
} else {
55-
report_fatal_error("Unable to retrieve constant int from "
56-
"sycl_kernel_omit_args metadata node");
57-
}
58-
} else {
59-
report_fatal_error(
60-
"Error while processing sycl_kernel_omit_args metadata node");
61-
}
62-
}
63-
return Res;
64-
}
6536

66-
SmallVector<StringRef> getArgTypeNames(const Function *F) {
67-
SmallVector<StringRef> Res;
68-
auto *TNNode = F->getMetadata("kernel_arg_type");
69-
assert(TNNode &&
70-
"kernel_arg_type metadata node is required for sycl native CPU");
71-
auto NumOperands = TNNode->getNumOperands();
72-
for (unsigned I = 0; I < NumOperands; I++) {
73-
auto &Op = TNNode->getOperand(I);
74-
auto *MDS = dyn_cast<MDString>(Op.get());
75-
if (!MDS)
76-
report_fatal_error("error while processing kernel_arg_types metadata");
77-
Res.push_back(MDS->getString());
78-
}
79-
return Res;
80-
}
81-
82-
void emitKernelDecl(const Function *F, const SmallVector<bool> &ArgMask,
83-
const SmallVector<StringRef> &ArgTypeNames,
84-
raw_ostream &O) {
85-
auto EmitArgDecl = [&](const Argument *Arg, unsigned Index) {
86-
Type *ArgTy = Arg->getType();
87-
if (isa<PointerType>(ArgTy))
88-
return "void *";
89-
return ArgTypeNames[Index].data();
90-
};
91-
92-
auto NumParams = F->getFunctionType()->getNumParams();
93-
O << "extern \"C\" void " << F->getName() << "(";
94-
95-
unsigned I = 0, UsedI = 0;
96-
for (; I + 1 < ArgMask.size() && UsedI + 1 < NumParams; I++) {
97-
if (!ArgMask[I])
98-
continue;
99-
O << EmitArgDecl(F->getArg(UsedI), I) << ", ";
100-
UsedI++;
101-
}
102-
103-
// parameters may have been removed.
104-
bool NoUsedArgs = true;
105-
for (auto &Entry : ArgMask) {
106-
NoUsedArgs &= !Entry;
107-
}
108-
if (NoUsedArgs) {
109-
O << ");\n";
110-
return;
111-
}
112-
// find the index of the last used arg
113-
while (!ArgMask[I] && I + 1 < ArgMask.size())
114-
I++;
115-
O << EmitArgDecl(F->getArg(UsedI), I) << ", __nativecpu_state *);\n";
116-
}
117-
118-
void emitSubKernelHandler(const Function *F, const SmallVector<bool> &ArgMask,
119-
const SmallVector<StringRef> &ArgTypeNames,
120-
raw_ostream &O) {
121-
SmallVector<unsigned> UsedArgIdx;
122-
auto EmitParamCast = [&](Argument *Arg, unsigned Index) {
123-
std::string Res;
124-
llvm::raw_string_ostream OS(Res);
125-
UsedArgIdx.push_back(Index);
126-
if (isa<PointerType>(Arg->getType())) {
127-
OS << " void* arg" << Index << " = ";
128-
OS << "MArgs[" << Index << "].getPtr();\n";
129-
return OS.str();
130-
}
131-
auto TN = ArgTypeNames[Index].str();
132-
OS << " " << TN << " arg" << Index << " = ";
133-
OS << "*(" << TN << "*)"
134-
<< "MArgs[" << Index << "].getPtr();\n";
135-
return OS.str();
136-
};
137-
138-
O << "\ninline static void " << F->getName() << "subhandler(";
37+
void emitSubKernelHandler(const Function *F, raw_ostream &O) {
38+
O << "\nextern \"C\" void " << F->getName() << "subhandler(";
13939
O << "const sycl::detail::NativeCPUArgDesc *MArgs, "
140-
"__nativecpu_state *state) {\n";
141-
// Retrieve only the args that are used
142-
for (unsigned I = 0, UsedI = 0;
143-
I < ArgMask.size() && UsedI < F->getFunctionType()->getNumParams();
144-
I++) {
145-
if (ArgMask[I]) {
146-
O << EmitParamCast(F->getArg(UsedI), I);
147-
UsedI++;
148-
}
149-
}
150-
// Emit the actual kernel call
151-
O << " " << F->getName() << "(";
152-
if (UsedArgIdx.size() == 0) {
153-
O << ");\n";
154-
} else {
155-
for (unsigned I = 0; I < UsedArgIdx.size() - 1; I++) {
156-
O << "arg" << UsedArgIdx[I] << ", ";
157-
}
158-
if (UsedArgIdx.size() >= 1)
159-
O << "arg" << UsedArgIdx.back();
160-
O << ", state);\n";
161-
}
162-
O << "};\n\n";
40+
"__nativecpu_state *state);\n";
41+
return;
16342
}
16443

16544
// Todo: maybe we could use clang-offload-wrapper for this,
@@ -254,10 +133,7 @@ PreservedAnalyses EmitSYCLNativeCPUHeaderPass::run(Module &M,
254133
O << "extern \"C\" void __sycl_register_lib(pi_device_binaries desc);\n";
255134

256135
for (auto *F : Kernels) {
257-
auto ArgMask = getArgMask(F);
258-
auto ArgTypeNames = getArgTypeNames(F);
259-
emitKernelDecl(F, ArgMask, ArgTypeNames, O);
260-
emitSubKernelHandler(F, ArgMask, ArgTypeNames, O);
136+
emitSubKernelHandler(F, O);
261137
emitSYCLRegisterLib(F, O);
262138
}
263139

llvm/lib/SYCLLowerIR/PrepareSYCLNativeCPU.cpp

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/IRBuilder.h"
2424
#include "llvm/IR/Instruction.h"
2525
#include "llvm/IR/Instructions.h"
26+
#include "llvm/IR/LLVMContext.h"
2627
#include "llvm/IR/Operator.h"
2728
#include "llvm/IR/Value.h"
2829
#include "llvm/InitializePasses.h"
@@ -63,7 +64,88 @@ void fixCallingConv(Function *F) {
6364
F->setLinkage(GlobalValue::LinkageTypes::WeakAnyLinkage);
6465
}
6566

66-
// Clone the function and returns a new function with a new argument on type T
67+
// returns the indexes of the used arguments
68+
SmallVector<unsigned> getUsedIndexes(const Function *F) {
69+
SmallVector<unsigned> res;
70+
auto UsedNode = F->getMetadata("sycl_kernel_omit_args");
71+
if (!UsedNode) {
72+
// the metadata node is not available if -fenable-sycl-dae
73+
// was not set; set everything to true
74+
// Exclude one arg because we already added the state ptr
75+
for (unsigned I = 0; I + 1 < F->getFunctionType()->getNumParams(); I++) {
76+
res.push_back(I);
77+
}
78+
return res;
79+
}
80+
auto NumOperands = UsedNode->getNumOperands();
81+
for (unsigned I = 0; I < NumOperands; I++) {
82+
auto &Op = UsedNode->getOperand(I);
83+
if (auto CAM = dyn_cast<ConstantAsMetadata>(Op.get())) {
84+
if (auto Const = dyn_cast<ConstantInt>(CAM->getValue())) {
85+
auto Val = Const->getValue();
86+
if (!Val.getBoolValue()) {
87+
res.push_back(I);
88+
}
89+
} else {
90+
report_fatal_error("Unable to retrieve constant int from "
91+
"sycl_kernel_omit_args metadata node");
92+
}
93+
} else {
94+
report_fatal_error(
95+
"Error while processing sycl_kernel_omit_args metadata node");
96+
}
97+
}
98+
return res;
99+
}
100+
101+
void emitSubkernelForKernel(Function *F, Type *NativeCPUArgDescType,
102+
Type *StatePtrType) {
103+
LLVMContext &Ctx = F->getContext();
104+
Type *NativeCPUArgDescPtrType = PointerType::getUnqual(NativeCPUArgDescType);
105+
106+
// Create function signature
107+
const std::string SubHandlerName = F->getName().str() + "subhandler";
108+
FunctionType *FTy = FunctionType::get(
109+
Type::getVoidTy(Ctx), {NativeCPUArgDescPtrType, StatePtrType}, false);
110+
auto SubhFCallee = F->getParent()->getOrInsertFunction(SubHandlerName, FTy);
111+
Function *SubhF = cast<Function>(SubhFCallee.getCallee());
112+
113+
// Emit function body, unpack kernel args
114+
auto UsedIndexes = getUsedIndexes(F);
115+
auto *KernelTy = F->getFunctionType();
116+
// assert(UsedIndexes.size() + 1 == KernelTy->getNumParams() && "mismatch
117+
// between number of params and used args");
118+
IRBuilder<> Builder(Ctx);
119+
BasicBlock *Block = BasicBlock::Create(Ctx, "entry", SubhF);
120+
Builder.SetInsertPoint(Block);
121+
unsigned NumArgs = UsedIndexes.size();
122+
auto *BaseNativeCPUArg = SubhF->getArg(0);
123+
SmallVector<Value *, 5> KernelArgs;
124+
for (unsigned I = 0; I < NumArgs; I++) {
125+
auto *Arg = F->getArg(I);
126+
auto UsedI = UsedIndexes[I];
127+
// Load the correct NativeCPUDesc and load the pointer from it
128+
auto *Addr = Builder.CreateGEP(NativeCPUArgDescType, BaseNativeCPUArg,
129+
{Builder.getInt64(UsedI)});
130+
auto *Load = Builder.CreateLoad(PointerType::getUnqual(Ctx), Addr);
131+
if (Arg->getType()->isPointerTy()) {
132+
// If the arg is a pointer, just use it
133+
KernelArgs.push_back(Load);
134+
} else {
135+
// Otherwise, load the scalar value and use that
136+
auto *Scalar = Builder.CreateLoad(Arg->getType(), Load);
137+
KernelArgs.push_back(Scalar);
138+
}
139+
}
140+
141+
// Call the kernel
142+
// Add the nativecpu state as arg
143+
KernelArgs.push_back(SubhF->getArg(1));
144+
Builder.CreateCall(KernelTy, F, KernelArgs);
145+
Builder.CreateRetVoid();
146+
}
147+
148+
// Clones the function and returns a new function with a new argument on type T
67149
// added as last argument
68150
Function *cloneFunctionAndAddParam(Function *OldF, Type *T) {
69151
auto *OldT = OldF->getFunctionType();
@@ -167,6 +249,14 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
167249
ModuleChanged |= true;
168250
}
169251

252+
StructType *NativeCPUArgDescType =
253+
StructType::create({PointerType::getUnqual(M.getContext())});
254+
for (auto &NewK : NewKernels) {
255+
emitSubkernelForKernel(NewK, NativeCPUArgDescType, StatePtrType);
256+
std::string NewName = NewK->getName().str() + "_NativeCPUKernel";
257+
NewK->setName(NewName);
258+
}
259+
170260
// Then we iterate over all the supported builtins, find their uses and
171261
// replace them with calls to our Native CPU functions.
172262
for (auto &Entry : BuiltinNamesMap) {

0 commit comments

Comments
 (0)