Skip to content

Commit 39515bd

Browse files
committed
Merge remote-tracking branch 'upstream/sycl' into sycl
2 parents 4657e3b + 3c3b485 commit 39515bd

File tree

169 files changed

+2968
-4717
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

169 files changed

+2968
-4717
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5795,7 +5795,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
57955795
}
57965796

57975797
// Enable local accessor to shared memory pass for SYCL.
5798-
if (isa<BackendJobAction>(JA) && IsSYCL) {
5798+
if (isa<BackendJobAction>(JA) && IsSYCLOffloadDevice &&
5799+
(Triple.isNVPTX() || Triple.isAMDGCN())) {
57995800
CmdArgs.push_back("-mllvm");
58005801
CmdArgs.push_back("-sycl-enable-local-accessor");
58015802
}
@@ -5959,7 +5960,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
59595960
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
59605961
// for sampling, overhead of call arc collection is way too high and there's
59615962
// no way to collect the output.
5962-
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
5963+
// Disable for SPIR-V compilations as well.
5964+
if (!Triple.isNVPTX() && !Triple.isAMDGCN() && !Triple.isSPIR())
59635965
addPGOAndCoverageFlags(TC, C, D, Output, Args, SanitizeArgs, CmdArgs);
59645966

59655967
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,6 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
427427
CmdArgs.push_back("--return-at-end");
428428
} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
429429
// Map the -O we received to -O{0,1,2,3}.
430-
//
431-
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
432-
// default, so it may correspond more closely to the spirit of clang -O2.
433430

434431
// -O3 seems like the least-bad option when -Osomething is specified to
435432
// clang but it isn't handled below.
@@ -451,9 +448,9 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
451448
}
452449
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
453450
} else {
454-
// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
455-
// to no optimizations, but ptxas's default is -O3.
456-
CmdArgs.push_back("-O0");
451+
// If no -O was passed, pass -O3 to ptxas -- this makes ptxas's
452+
// optimization level the same as the ptxjitcompiler.
453+
CmdArgs.push_back("-O3");
457454
}
458455
if (DIKind == DebugDirectivesOnly)
459456
CmdArgs.push_back("-lineinfo");

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,6 @@ void SYCL::fpga::BackendCompiler::constructOpenCLAOTCommand(
335335
// will be compiled to an aocx file.
336336
InputInfoList ForeachInputs;
337337
InputInfoList FPGADepFiles;
338-
StringRef CreatedReportName;
339338
ArgStringList CmdArgs{"-device=fpga_fast_emu"};
340339

341340
for (const auto &II : Inputs) {
@@ -652,8 +651,6 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
652651
default:
653652
DAL->append(A);
654653
break;
655-
case options::OPT_fcoverage_mapping:
656-
break;
657654
}
658655
}
659656
}

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,9 +636,16 @@ class DiagDeviceFunction : public RecursiveASTVisitor<DiagDeviceFunction> {
636636
}
637637
} else if (!SemaRef.getLangOpts().SYCLAllowFuncPtr &&
638638
!e->isTypeDependent() &&
639-
!isa<CXXPseudoDestructorExpr>(e->getCallee()))
640-
SemaRef.Diag(e->getExprLoc(), diag::err_sycl_restrict)
641-
<< Sema::KernelCallFunctionPointer;
639+
!isa<CXXPseudoDestructorExpr>(e->getCallee())) {
640+
bool MaybeConstantExpr = false;
641+
Expr *NonDirectCallee = e->getCallee();
642+
if (!NonDirectCallee->isValueDependent())
643+
MaybeConstantExpr =
644+
NonDirectCallee->isCXX11ConstantExpr(SemaRef.getASTContext());
645+
if (!MaybeConstantExpr)
646+
SemaRef.Diag(e->getExprLoc(), diag::err_sycl_restrict)
647+
<< Sema::KernelCallFunctionPointer;
648+
}
642649
return true;
643650
}
644651

clang/test/Driver/cuda-external-tools.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@
4040
// RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
4141
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
4242

43-
// Regular compile without -O. This should result in us passing -O0 to ptxas.
43+
// Regular compile without -O. This should result in us passing -O3 to ptxas.
4444
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
4545
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
46-
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
46+
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
4747

4848
// Regular compiles with -Os and -Oz. For lack of a better option, we map
4949
// these to ptxas -O3.
@@ -75,7 +75,7 @@
7575
// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
7676
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
7777
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
78-
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
78+
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
7979
// Check that we still pass -c when generating relocatable device code.
8080
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
8181
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \

clang/test/Driver/sycl-local-accessor-opt.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
// REQUIRES: clang-driver
44

55
// RUN: %clang -fsycl -### %s 2>&1 \
6-
// RUN: | FileCheck -check-prefix=CHECK-NO-OPT %s
7-
// CHECK-NO-OPT-NOT: "-sycl-enable-local-accessor"
6+
// RUN: | FileCheck -check-prefix=OPT-CHECK %s
7+
8+
// RUN: %clang -fsycl -S -### %s 2>&1 \
9+
// RUN: | FileCheck -check-prefix=OPT-CHECK %s
10+
// OPT-CHECK-NOT: "-sycl-enable-local-accessor"
811

912
// RUN: %clang -fsycl -fsycl-targets=nvptx64-nvidia-cuda -### %s 2>&1 \
1013
// RUN: | FileCheck %s

clang/test/Driver/sycl-offload.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,19 @@
3030
/// Check that -fcoverage-mapping is disabled for device
3131
// RUN: %clang -### -fsycl -fprofile-instr-generate -fcoverage-mapping -target x86_64-unknown-linux-gnu -c %s 2>&1 \
3232
// RUN: | FileCheck -check-prefix=CHECK_COVERAGE_MAPPING %s
33-
// CHECK_COVERAGE_MAPPING: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fprofile-instrument=clang"
33+
// CHECK_COVERAGE_MAPPING: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}}
34+
// CHECK_COVERAGE_MAPPING-NOT: "-fprofile-instrument=clang"
3435
// CHECK_COVERAGE_MAPPING-NOT: "-fcoverage-mapping"
3536
// CHECK_COVERAGE_MAPPING: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-fsycl-is-host"{{.*}} "-fprofile-instrument=clang"{{.*}} "-fcoverage-mapping"{{.*}}
3637

38+
/// Check that -fprofile-arcs -ftest-coverage is disabled for device
39+
// RUN: %clang -### -fsycl -fprofile-arcs -ftest-coverage -target x86_64-unknown-linux-gnu -c %s 2>&1 \
40+
// RUN: | FileCheck -check-prefix=CHECK_TEST_COVERAGE %s
41+
// CHECK_TEST_COVERAGE: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}}
42+
// CHECK_TEST_COVERAGE-NOT: "-ftest-coverage"
43+
// CHECK_TEST_COVERAGE-NOT: "-fprofile-arcs"
44+
// CHECK_TEST_COVERAGE: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-fsycl-is-host"{{.*}} "-ftest-coverage" "-fprofile-arcs"
45+
3746
/// check for PIC for device wrap compilation when using -shared or -fPIC
3847
// RUN: %clangxx -### -fsycl -target x86_64-unknown-linux-gnu -shared %s 2>&1 \
3948
// RUN: | FileCheck -check-prefix=CHECK_SHARED %s
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -fsyntax-only -verify -sycl-std=2020 -std=c++17 %s
2+
3+
// This test checks that the compiler doesn't emit an error when indirect call
4+
// was made through a function pointer that is constant expression, and makes
5+
// sure that the error is emitted when a function pointer is not a constant
6+
// expression.
7+
8+
void t() {}
9+
10+
constexpr auto F = t;
11+
const auto F1 = t;
12+
13+
typedef void (*SomeFunc)();
14+
15+
constexpr SomeFunc foo() { return t; }
16+
17+
const SomeFunc foo1() { return t; }
18+
19+
void bar1(const SomeFunc fptr) {
20+
fptr();
21+
}
22+
23+
template <auto f> void fooNTTP() { f(); }
24+
25+
__attribute__((sycl_device)) void bar() {
26+
// OK
27+
constexpr auto f = t;
28+
f();
29+
const auto f1 = t;
30+
// expected-error@+1 {{SYCL kernel cannot call through a function pointer}}
31+
f1();
32+
auto f2 = t;
33+
// expected-error@+1 {{SYCL kernel cannot call through a function pointer}}
34+
f2();
35+
36+
// OK
37+
F();
38+
// expected-error@+1 {{SYCL kernel cannot call through a function pointer}}
39+
F1();
40+
41+
constexpr auto ff = foo();
42+
ff();
43+
const auto ff1 = foo();
44+
// expected-error@+1 {{SYCL kernel cannot call through a function pointer}}
45+
ff1();
46+
const auto fff = foo1();
47+
// expected-error@+1 {{SYCL kernel cannot call through a function pointer}}
48+
fff();
49+
50+
fooNTTP<t>();
51+
}

libclc/amdgcn-amdhsa/libspirv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ math/log.cl
3737
math/log10.cl
3838
math/log1p.cl
3939
math/logb.cl
40+
math/mangle_common.h
4041
math/modf.cl
4142
math/nextafter.cl
4243
math/pow.cl

libclc/amdgcn-amdhsa/libspirv/math/frexp.cl

Lines changed: 79 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,81 +6,104 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "mangle_common.h"
910
#include <spirv/spirv.h>
1011
#include <utils.h>
1112

1213
double __ocml_frexp_f64(double, int *);
1314
float __ocml_frexp_f32(float, int *);
1415

15-
_CLC_OVERLOAD _CLC_DEF float __clc_spirv_ocl_frexp(float x, private int *ep) {
16-
return __ocml_frexp_f32(x, ep);
17-
}
16+
#define FUNCNAME(IN, OUT) \
17+
__CLC_XCONCAT(__CLC_XCONCAT(_Z17__spirv_ocl_frexp, IN), OUT)
18+
#define VEC_TYPE(T, N) __CLC_XCONCAT(__CLC_XCONCAT(__CLC_XCONCAT(Dv, N), _), T)
19+
#define VEC_FUNCNAME(N, MANGLED_IN_TYPE, MANGLED_PTR, MANGLED_OUT_TYPE) \
20+
FUNCNAME(VEC_TYPE(MANGLED_IN_TYPE, N), \
21+
__CLC_XCONCAT(MANGLED_PTR, VEC_TYPE(MANGLED_OUT_TYPE, N)))
22+
23+
#define MANUALLY_MANGLED_FREXP_IMPL(ADDRSPACE, BUILTIN, ARG1_TYPE, \
24+
MANGLED_ARG1_TYPE, MANGLED_ARG2_TYPE) \
25+
_CLC_DEF ARG1_TYPE FUNCNAME(MANGLED_ARG1_TYPE, MANGLED_ARG2_TYPE)( \
26+
ARG1_TYPE x, __attribute((address_space(ADDRSPACE))) int *ptr) { \
27+
int stack_iptr; \
28+
ARG1_TYPE ret = BUILTIN(x, &stack_iptr); \
29+
*ptr = stack_iptr; \
30+
return ret; \
31+
}
32+
33+
#define __CLC_FREXP(BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE) \
34+
MANUALLY_MANGLED_FREXP_IMPL(0, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, Pi) \
35+
MANUALLY_MANGLED_FREXP_IMPL(1, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, \
36+
PU3AS1i) \
37+
MANUALLY_MANGLED_FREXP_IMPL(3, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, \
38+
PU3AS3i) \
39+
MANUALLY_MANGLED_FREXP_IMPL(5, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, PU3AS5i)
40+
41+
#define FNAME_GENERIC(N) VEC_FUNCNAME(N, f, P, i)
42+
#define FNAME_GLOBAL(N) VEC_FUNCNAME(N, f, PU3AS1, i)
43+
#define FNAME_LOCAL(N) VEC_FUNCNAME(N, f, PU3AS3, i)
44+
#define FNAME_PRIVATE(N) VEC_FUNCNAME(N, f, PU3AS5, i)
45+
46+
__CLC_FREXP(__ocml_frexp_f32, float, f)
47+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, Pi), FNAME_GENERIC, float, 0, int)
48+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, PU3AS1i), FNAME_GLOBAL, float, 1,
49+
int)
50+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, PU3AS3i), FNAME_LOCAL, float, 3,
51+
int)
52+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, PU3AS5i), FNAME_PRIVATE, float, 5,
53+
int)
54+
55+
#undef FNAME_GENERIC
56+
#undef FNAME_GLOBAL
57+
#undef FNAME_LOCAL
58+
#undef FNAME_PRIVATE
1859

1960
#ifdef cl_khr_fp64
2061

2162
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
2263

23-
_CLC_OVERLOAD _CLC_DEF double __clc_spirv_ocl_frexp(double x, private int *ep) {
24-
return __ocml_frexp_f64(x, ep);
25-
}
64+
#define FNAME_GENERIC(N) VEC_FUNCNAME(N, d, P, i)
65+
#define FNAME_GLOBAL(N) VEC_FUNCNAME(N, d, PU3AS1, i)
66+
#define FNAME_LOCAL(N) VEC_FUNCNAME(N, d, PU3AS3, i)
67+
#define FNAME_PRIVATE(N) VEC_FUNCNAME(N, d, PU3AS5, i)
68+
69+
__CLC_FREXP(__ocml_frexp_f64, double, d)
70+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, Pi), FNAME_GENERIC, double, 0,
71+
int)
72+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, PU3AS1i), FNAME_GLOBAL, double, 1,
73+
int)
74+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, PU3AS3i), FNAME_LOCAL, double, 3,
75+
int)
76+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, PU3AS5i), FNAME_PRIVATE, double,
77+
5, int)
78+
79+
#undef FNAME_GENERIC
80+
#undef FNAME_GLOBAL
81+
#undef FNAME_LOCAL
82+
#undef FNAME_PRIVATE
2683

2784
#endif
2885

2986
#ifdef cl_khr_fp16
3087

3188
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
3289

33-
_CLC_OVERLOAD _CLC_DEF half __clc_spirv_ocl_frexp(half x, private int *ep) {
34-
float t = x;
35-
return __ocml_frexp_f32(t, ep);
36-
}
90+
#define FNAME_GENERIC(N) VEC_FUNCNAME(N, Dh, P, i)
91+
#define FNAME_GLOBAL(N) VEC_FUNCNAME(N, Dh, PU3AS1, i)
92+
#define FNAME_LOCAL(N) VEC_FUNCNAME(N, Dh, PU3AS3, i)
93+
#define FNAME_PRIVATE(N) VEC_FUNCNAME(N, Dh, PU3AS5, i)
3794

38-
#endif
39-
40-
#define __CLC_ADDRESS_SPACE private
41-
#define __CLC_GENTYPE float
42-
#include <frexp.inc>
43-
#undef __CLC_GENTYPE
44-
#ifdef cl_khr_fp64
45-
#define __CLC_GENTYPE double
46-
#include <frexp.inc>
47-
#undef __CLC_GENTYPE
48-
#endif
49-
#ifdef cl_khr_fp16
50-
#define __CLC_GENTYPE half
51-
#include <frexp.inc>
52-
#undef __CLC_GENTYPE
53-
#endif
54-
#undef __CLC_ADDRESS_SPACE
95+
__CLC_FREXP(__ocml_frexp_f32, half, Dh)
96+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, Pi), FNAME_GENERIC, half, 0, int)
97+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, PU3AS1i), FNAME_GLOBAL, half, 1,
98+
int)
99+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, PU3AS3i), FNAME_LOCAL, half, 3,
100+
int)
101+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, PU3AS5i), FNAME_PRIVATE, half, 5,
102+
int)
55103

56-
#define __CLC_ADDRESS_SPACE global
57-
#define __CLC_GENTYPE float
58-
#include <frexp.inc>
59-
#undef __CLC_GENTYPE
60-
#ifdef cl_khr_fp64
61-
#define __CLC_GENTYPE double
62-
#include <frexp.inc>
63-
#undef __CLC_GENTYPE
64-
#endif
65-
#ifdef cl_khr_fp16
66-
#define __CLC_GENTYPE half
67-
#include <frexp.inc>
68-
#undef __CLC_GENTYPE
69-
#endif
70-
#undef __CLC_ADDRESS_SPACE
104+
#undef FNAME_GENERIC
105+
#undef FNAME_GLOBAL
106+
#undef FNAME_LOCAL
107+
#undef FNAME_PRIVATE
71108

72-
#define __CLC_ADDRESS_SPACE local
73-
#define __CLC_GENTYPE float
74-
#include <frexp.inc>
75-
#undef __CLC_GENTYPE
76-
#ifdef cl_khr_fp64
77-
#define __CLC_GENTYPE double
78-
#include <frexp.inc>
79-
#undef __CLC_GENTYPE
80-
#endif
81-
#ifdef cl_khr_fp16
82-
#define __CLC_GENTYPE half
83-
#include <frexp.inc>
84-
#undef __CLC_GENTYPE
85109
#endif
86-
#undef __CLC_ADDRESS_SPACE

0 commit comments

Comments
 (0)