Skip to content

Commit 8d77da7

Browse files
authored
[SYCL] Split device images based on accuracy level provided in option (#10140)
This PR reuses optional kernel features mechanism to split device image based on accuracy level provided using -ffp-accuracy compilation option (introduced in PR#8280): 1. When frontend emits fp intrinsic call and attaches the maximum error attribute we also attach "sycl_used_aspects" metadata to the call instruction with a value which corresponds to high, medium, low, sycl or cuda. Mapping for those values is needed to be visible for SYCL device compiler only and we intentionally don't put those values to aspects enum because we don't need these aspects to be visible to the user in this case (because of the reasons described in Details below). 2. Make SYCLPropagateAspectsUsage to propagate sycl_used_aspects metadata from instructions to kernel. 3. Don't add internal aspects into the device requirements, because we don't need processing of these internal aspects (with negative values) in the SYCL RT. Splitting functionality based on sycl_used_aspects metadata is available for free. Details: Currently accruracy level can be controlled using the following options. For entire translation unit: -ffp-accuracy=high -ffp-accuracy=medium -ffp-accuracy=low -ffp-accuracy=sycl -ffp-accuracy=cuda For particular funcions in the translation unit: -ffp-accuracy=low:sin,cos Whenever frontend sees a math function in a kernel or a device function it emits fp intrinsic call with attached callsite attribute indicating value of the maximum error. llvm-spirv is going to translate this builtins to regular __ocl intrinsics and translate callsite attribute to decorator (which is a new spirv extension). If that extension is not supported by the backend, it is going to emit an error. Error is emitted also in the case if backend supports the extension but can't compile the kernel because it doesn't have corresponding implemenation of math function complying with required maximum error. Aspects corrsponding to different levels of accuracy are not suitable in this case because aforementioned options are sycl program compilation options, i.e. it doesn't make sense to provide an opportunity to the user to write something like this: if (dev.has(aspect::ext_oneapi_fp_intrinsic_accuracy_high)) { /* submit kernel using high accuracy intrinsics */ } But on our side we still would like to put kernels and device functions to different images based on required accuracy level. It is necessary because some backends may support, for example, low and medium accuracy but don't support high accuracy.
1 parent 833a9fe commit 8d77da7

File tree

11 files changed

+520
-26
lines changed

11 files changed

+520
-26
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -540,12 +540,18 @@ static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
540540
// TODO: Replace AttrList with a single attribute. The call can only have a
541541
// single FPAccuracy attribute.
542542
llvm::AttributeList AttrList;
543+
// "sycl_used_aspects" metadata associated with the call.
544+
llvm::Metadata *AspectMD = nullptr;
543545
// sincos() doesn't return a value, but it still has a type associated with
544546
// it that corresponds to the operand type.
545547
CGF.CGM.getFPAccuracyFuncAttributes(
546-
Name, AttrList, ID,
548+
Name, AttrList, AspectMD, ID,
547549
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
548550
CI->setAttributes(AttrList);
551+
552+
if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
553+
CI->setMetadata("sycl_used_aspects",
554+
llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
549555
return CI;
550556
}
551557

@@ -22418,21 +22424,22 @@ llvm::CallInst *CodeGenFunction::EmitFPBuiltinIndirectCall(
2241822424
// Even if the current function doesn't have a clang builtin, create
2241922425
// an 'fpbuiltin-max-error' attribute for it; unless it's marked with
2242022426
// an NoBuiltin attribute.
22421-
if (!FD->hasAttr<NoBuiltinAttr>()) {
22422-
Name = FD->getName();
22423-
FPAccuracyIntrinsicID =
22424-
llvm::StringSwitch<unsigned>(Name)
22425-
.Case("fadd", llvm::Intrinsic::fpbuiltin_fadd)
22426-
.Case("fdiv", llvm::Intrinsic::fpbuiltin_fdiv)
22427-
.Case("fmul", llvm::Intrinsic::fpbuiltin_fmul)
22428-
.Case("fsub", llvm::Intrinsic::fpbuiltin_fsub)
22429-
.Case("frem", llvm::Intrinsic::fpbuiltin_frem)
22430-
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
22431-
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
22432-
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt);
22433-
} else {
22427+
if (FD->hasAttr<NoBuiltinAttr>() ||
22428+
!FD->getNameInfo().getName().isIdentifier())
2243422429
return nullptr;
22435-
}
22430+
22431+
Name = FD->getName();
22432+
FPAccuracyIntrinsicID =
22433+
llvm::StringSwitch<unsigned>(Name)
22434+
.Case("fadd", llvm::Intrinsic::fpbuiltin_fadd)
22435+
.Case("fdiv", llvm::Intrinsic::fpbuiltin_fdiv)
22436+
.Case("fmul", llvm::Intrinsic::fpbuiltin_fmul)
22437+
.Case("fsub", llvm::Intrinsic::fpbuiltin_fsub)
22438+
.Case("frem", llvm::Intrinsic::fpbuiltin_frem)
22439+
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
22440+
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
22441+
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
22442+
.Default(0);
2243622443
} else {
2243722444
// The function has a clang builtin. Create an attribute for it
2243822445
// only if it has an fpbuiltin intrinsic.
@@ -22512,6 +22519,9 @@ llvm::CallInst *CodeGenFunction::EmitFPBuiltinIndirectCall(
2251222519
break;
2251322520
}
2251422521
}
22522+
if (!FPAccuracyIntrinsicID)
22523+
return nullptr;
22524+
2251522525
Func = CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
2251622526
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
2251722527
FPAccuracyIntrinsicID);

clang/lib/CodeGen/CGCall.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "CGCXXABI.h"
1818
#include "CGCleanup.h"
1919
#include "CGRecordLayout.h"
20+
#include "CGSYCLRuntime.h"
2021
#include "CodeGenFunction.h"
2122
#include "CodeGenModule.h"
2223
#include "TargetInfo.h"
@@ -1860,9 +1861,21 @@ static llvm::fp::FPAccuracy convertFPAccuracy(StringRef FPAccuracyStr) {
18601861
.Case("cuda", llvm::fp::FPAccuracy::CUDA);
18611862
}
18621863

1864+
static int32_t convertFPAccuracyToAspect(StringRef FPAccuracyStr) {
1865+
assert(FPAccuracyStr.equals("high") || FPAccuracyStr.equals("medium") ||
1866+
FPAccuracyStr.equals("low") || FPAccuracyStr.equals("sycl") ||
1867+
FPAccuracyStr.equals("cuda"));
1868+
return llvm::StringSwitch<int32_t>(FPAccuracyStr)
1869+
.Case("high", SYCLInternalAspect::fp_intrinsic_accuracy_high)
1870+
.Case("medium", SYCLInternalAspect::fp_intrinsic_accuracy_medium)
1871+
.Case("low", SYCLInternalAspect::fp_intrinsic_accuracy_low)
1872+
.Case("sycl", SYCLInternalAspect::fp_intrinsic_accuracy_sycl)
1873+
.Case("cuda", SYCLInternalAspect::fp_intrinsic_accuracy_cuda);
1874+
}
1875+
18631876
void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
1864-
StringRef Name, llvm::AttrBuilder &FuncAttrs, unsigned ID,
1865-
const llvm::Type *FuncType) {
1877+
StringRef Name, llvm::AttrBuilder &FuncAttrs, llvm::Metadata *&MD,
1878+
unsigned ID, const llvm::Type *FuncType) {
18661879
// Priority is given to to the accuracy specific to the function.
18671880
// So, if the command line is something like this:
18681881
// 'clang -fp-accuracy = high -fp-accuracy = low:[sin]'.
@@ -1878,6 +1891,8 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
18781891
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
18791892
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
18801893
FuncAttrs.addAttribute("fpbuiltin-max-error=", FPAccuracyVal);
1894+
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
1895+
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
18811896
}
18821897
}
18831898
if (FuncAttrs.attrs().size() == 0)
@@ -1886,6 +1901,8 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
18861901
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
18871902
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
18881903
FuncAttrs.addAttribute("fpbuiltin-max-error=", FPAccuracyVal);
1904+
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
1905+
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
18891906
}
18901907
}
18911908

clang/lib/CodeGen/CGSYCLRuntime.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,19 @@ namespace CodeGen {
2323

2424
class CodeGenModule;
2525

26+
// These aspects are internal and used for device image splitting purposes only.
27+
// They are not exposed to the SYCL users through "aspect" enum. That's why
28+
// they are intentionally assigned negative values to filter them out at the
29+
// stage of embedding used aspects as device requirements to the executable.
30+
// We don't pass these internal aspects to the SYCL RT.
31+
enum SYCLInternalAspect : int32_t {
32+
fp_intrinsic_accuracy_high = -1,
33+
fp_intrinsic_accuracy_medium = -2,
34+
fp_intrinsic_accuracy_low = -3,
35+
fp_intrinsic_accuracy_sycl = -4,
36+
fp_intrinsic_accuracy_cuda = -5,
37+
};
38+
2639
class CGSYCLRuntime {
2740
protected:
2841
CodeGenModule &CGM;

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8122,10 +8122,11 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) {
81228122

81238123
void CodeGenModule::getFPAccuracyFuncAttributes(StringRef Name,
81248124
llvm::AttributeList &AttrList,
8125+
llvm::Metadata *&MD,
81258126
unsigned ID,
81268127
const llvm::Type *FuncType) {
81278128
llvm::AttrBuilder FuncAttrs(getLLVMContext());
8128-
getDefaultFunctionFPAccuracyAttributes(Name, FuncAttrs, ID, FuncType);
8129+
getDefaultFunctionFPAccuracyAttributes(Name, FuncAttrs, MD, ID, FuncType);
81298130
AttrList = llvm::AttributeList::get(
81308131
getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);
81318132
}

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,8 @@ class CodeGenModule : public CodeGenTypeCache {
15891589
void moveLazyEmissionStates(CodeGenModule *NewBuilder);
15901590

15911591
void getFPAccuracyFuncAttributes(StringRef Name,
1592-
llvm::AttributeList &AttrList, unsigned ID,
1592+
llvm::AttributeList &AttrList,
1593+
llvm::Metadata *&MDs, unsigned ID,
15931594
const llvm::Type *FuncType);
15941595

15951596
private:
@@ -1789,7 +1790,7 @@ class CodeGenModule : public CodeGenTypeCache {
17891790

17901791
void getDefaultFunctionFPAccuracyAttributes(StringRef Name,
17911792
llvm::AttrBuilder &FuncAttrs,
1792-
unsigned ID,
1793+
llvm::Metadata *&MD, unsigned ID,
17931794
const llvm::Type *FuncType);
17941795

17951796
llvm::Metadata *CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map,
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ffp-builtin-accuracy=high:sin,sqrt -ffp-builtin-accuracy=medium:cos -ffp-builtin-accuracy=low:tan -ffp-builtin-accuracy=cuda:exp,acos -ffp-builtin-accuracy=sycl:log,asin -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck --check-prefix CHECK-FUNC %s
2+
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ffp-builtin-accuracy=high -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck --check-prefix CHECK-TU %s
3+
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ffp-builtin-accuracy=medium -ffp-builtin-accuracy=high:sin,sqrt -ffp-builtin-accuracy=medium:cos -ffp-builtin-accuracy=cuda:exp -ffp-builtin-accuracy=sycl:log -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck --check-prefix CHECK-MIX %s
4+
5+
// Tests that sycl_used_aspects metadata is attached to the fpbuiltin call based on -ffp-accuracy option.
6+
7+
#include "sycl.hpp"
8+
9+
extern "C" SYCL_EXTERNAL double sin(double);
10+
extern "C" SYCL_EXTERNAL double cos(double);
11+
extern "C" SYCL_EXTERNAL double tan(double);
12+
extern "C" SYCL_EXTERNAL double log(double);
13+
extern "C" SYCL_EXTERNAL double exp(double);
14+
extern "C" SYCL_EXTERNAL double acos(double);
15+
extern "C" SYCL_EXTERNAL double asin(double);
16+
extern "C" SYCL_EXTERNAL double sqrt(double);
17+
18+
using namespace sycl;
19+
20+
int main() {
21+
const unsigned array_size = 4;
22+
double Value = .5;
23+
queue deviceQueue;
24+
range<1> numOfItems{array_size};
25+
26+
// Kernel0 doesn't use math functions.
27+
deviceQueue.submit([&](handler& cgh) {
28+
cgh.parallel_for<class Kernel0>(numOfItems,
29+
[=](id<1> wiID) {
30+
(void)Value;
31+
});
32+
});
33+
34+
// Kernel1 uses high-accuracy sin.
35+
deviceQueue.submit([&](handler& cgh) {
36+
cgh.parallel_for<class Kernel1>(numOfItems,
37+
[=](id<1> wiID) {
38+
// CHECK-FUNC: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC:[0-9]+]]
39+
// CHECK-TU: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC:[0-9]+]]
40+
// CHECK-MIX: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC:[0-9]+]]
41+
(void)sin(Value);
42+
});
43+
});
44+
45+
deviceQueue.submit([&](handler& cgh) {
46+
cgh.parallel_for<class Kernel2>(numOfItems,
47+
[=](id<1> wiID) {
48+
// CHECK-FUNC: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC:[0-9]+]]
49+
// CHECK-TU: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
50+
// CHECK-MIX: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC:[0-9]+]]
51+
(void)cos(Value);
52+
});
53+
});
54+
55+
// Kernel3 uses low-accuracy tan.
56+
deviceQueue.submit([&](handler& cgh) {
57+
cgh.parallel_for<class Kernel3>(numOfItems,
58+
[=](id<1> wiID) {
59+
// CHECK-FUNC: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[LOW_ACC:[0-9]+]]
60+
// CHECK-TU: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
61+
// CHECK-MIX: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC]]
62+
(void)tan(Value);
63+
});
64+
});
65+
66+
// Kernel4 uses cuda-accuracy exp and sycl-accuracy log.
67+
deviceQueue.submit([&](handler& cgh) {
68+
cgh.parallel_for<class Kernel4>(numOfItems,
69+
[=](id<1> wiID) {
70+
// CHECK-FUNC: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[CUDA_ACC:[0-9]+]]
71+
// CHECK-FUNC: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[SYCL_ACC:[0-9]+]]
72+
// CHECK-TU: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
73+
// CHECK-TU: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
74+
// CHECK-MIX: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[CUDA_ACC:[0-9]+]]
75+
// CHECK-MIX: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[SYCL_ACC:[0-9]+]]
76+
(void)log(exp(Value));
77+
});
78+
});
79+
deviceQueue.wait();
80+
81+
// Kernel5 uses cuda-accuracy acos.
82+
deviceQueue.submit([&](handler& cgh) {
83+
cgh.parallel_for<class Kernel5>(numOfItems,
84+
[=](id<1> wiID) {
85+
// CHECK-FUNC: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[CUDA_ACC]]
86+
// CHECK-TU: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
87+
// CHECK-MIX: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC]]
88+
(void)acos(Value);
89+
});
90+
});
91+
92+
// Kernel6 uses sycl-accuracy asin.
93+
deviceQueue.submit([&](handler& cgh) {
94+
cgh.parallel_for<class Kernel6>(numOfItems,
95+
[=](id<1> wiID) {
96+
// CHECK-FUNC: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[SYCL_ACC]]
97+
// CHECK-TU: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
98+
// CHECK-MIX: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC]]
99+
(void)asin(Value);
100+
});
101+
});
102+
103+
// Kernel7 uses high-accuracy sqrt.
104+
deviceQueue.submit([&](handler& cgh) {
105+
cgh.parallel_for<class Kernel7>(numOfItems,
106+
[=](id<1> wiID) {
107+
// CHECK-FUNC: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
108+
// CHECK-TU: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
109+
// CHECK-MIX: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]]
110+
(void)sqrt(Value);
111+
});
112+
});
113+
return 0;
114+
}
115+
116+
// CHECK-FUNC: [[HIGH_ACC]] = !{i32 -1}
117+
// CHECK-FUNC: [[MEDIUM_ACC]] = !{i32 -2}
118+
// CHECK-FUNC: [[LOW_ACC]] = !{i32 -3}
119+
// CHECK-FUNC: [[CUDA_ACC]] = !{i32 -5}
120+
// CHECK-FUNC: [[SYCL_ACC]] = !{i32 -4}
121+
122+
// CHECK-TU: [[HIGH_ACC]] = !{i32 -1}
123+
124+
// CHECK-MIX: [[HIGH_ACC]] = !{i32 -1}
125+
// CHECK-MIX: [[MEDIUM_ACC]] = !{i32 -2}
126+
// CHECK-MIX: [[CUDA_ACC]] = !{i32 -5}
127+
// CHECK-MIX: [[SYCL_ACC]] = !{i32 -4}

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,13 @@ AspectsSetTy getAspectsUsedByInstruction(const Instruction &I,
255255
Result.insert(Aspects.begin(), Aspects.end());
256256
}
257257

258+
if (const MDNode *InstApsects = I.getMetadata("sycl_used_aspects")) {
259+
for (const MDOperand &MDOp : InstApsects->operands()) {
260+
const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue();
261+
Result.insert(cast<ConstantInt>(C)->getSExtValue());
262+
}
263+
}
264+
258265
return Result;
259266
}
260267

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; RUN: opt -passes=sycl-propagate-aspects-usage < %s -S | FileCheck %s
2+
;
3+
; Test checks that the pass is able to propagate information about aspects
4+
; used in the instruction through a call graph
5+
;
6+
; K1 K2
7+
; / \/ \
8+
; F1 F2 F3
9+
;
10+
; F1 doesn't use optional type and doesn't have instruction with attached 'sycl_used_aspects' metadata.
11+
; F2 uses optional A and has instruction with attached 'sycl_used_aspects' metadata.
12+
; F3 uses optional B and has instruction with attached 'sycl_used_aspects' metadata.
13+
14+
%Optional.A = type { i32 }
15+
%Optional.B = type { i32 }
16+
17+
; CHECK: spir_kernel void @kernel1() !sycl_used_aspects ![[#ID1:]]
18+
define spir_kernel void @kernel1() {
19+
call spir_func void @func1()
20+
call spir_func void @func2()
21+
ret void
22+
}
23+
24+
; CHECK: spir_kernel void @kernel2() !sycl_used_aspects ![[#ID2:]]
25+
define spir_kernel void @kernel2() {
26+
call spir_func void @func2()
27+
call spir_func void @func3()
28+
ret void
29+
}
30+
31+
; CHECK: spir_func void @func1() {
32+
define spir_func void @func1() {
33+
%tmp = alloca i32
34+
ret void
35+
}
36+
37+
declare void @llvm.fpbuiltin.f64()
38+
39+
; CHECK: spir_func void @func2() !sycl_used_aspects ![[#ID1]] {
40+
define spir_func void @func2() {
41+
%tmp1 = alloca %Optional.A
42+
call void @llvm.fpbuiltin.f64(), !sycl_used_aspects !3
43+
ret void
44+
}
45+
46+
; CHECK: spir_func void @func3() !sycl_used_aspects ![[#ID3:]] {
47+
define spir_func void @func3() {
48+
%tmp = alloca %Optional.B
49+
call void @llvm.fpbuiltin.f64(), !sycl_used_aspects !4
50+
ret void
51+
}
52+
53+
!sycl_types_that_use_aspects = !{!0, !1}
54+
!0 = !{!"Optional.A", i32 1}
55+
!1 = !{!"Optional.B", i32 2}
56+
57+
!sycl_aspects = !{!2}
58+
!2 = !{!"fp64", i32 6}
59+
!3 = !{i32 -1}
60+
!4 = !{i32 -2}
61+
62+
; CHECK: ![[#ID1]] = !{i32 1, i32 -1}
63+
; CHECK: ![[#ID2]] = !{i32 1, i32 -1, i32 2, i32 -2}
64+
; CHECK: ![[#ID3]] = !{i32 2, i32 -2}
65+
66+

0 commit comments

Comments
 (0)