Skip to content

Commit 1499836

Browse files
author
Hugh Delaney
committed
Merge remote-tracking branch 'Jack/bf16-cvt-ext' into tf32-joint-matrix
2 parents 618c807 + 8a29c44 commit 1499836

File tree

23 files changed

+764
-110
lines changed

23 files changed

+764
-110
lines changed

buildbot/dependency.conf

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ ocl_cpu_rt_ver_win=2021.13.11.0.23
77
# https://github.com/intel/compute-runtime/releases/tag/22.09.22577
88
ocl_gpu_rt_ver=22.09.22577
99
# Same GPU driver supports Level Zero and OpenCL
10-
# https://downloadmirror.intel.com/721124/igfx_win_101.1340.zip
11-
ocl_gpu_rt_ver_win=101.1340
10+
# https://downloadmirror.intel.com/723911/igfx_win_101.1404.zip
11+
ocl_gpu_rt_ver_win=101.1404
1212
intel_sycl_ver=build
1313

1414
# TBB binaries can be built from sources following instructions under
@@ -25,14 +25,14 @@ ocl_fpga_emu_ver=2021.13.11.0.23
2525
ocl_fpga_emu_ver_win=2021.13.11.0.23
2626
fpga_ver=20211014_000004
2727
fpga_ver_win=20211014_000004
28-
# https://downloadmirror.intel.com/721124/igfx_win_101.1340.zip
29-
ocloc_ver_win=101.1340
28+
# https://downloadmirror.intel.com/723911/igfx_win_101.1404.zip
29+
ocloc_ver_win=101.1404
3030

3131
[DRIVER VERSIONS]
3232
cpu_driver_lin=2021.13.11.0.23
3333
cpu_driver_win=2021.13.11.0.23
3434
gpu_driver_lin=22.09.22577
35-
gpu_driver_win=101.1340
35+
gpu_driver_win=101.1404
3636
fpga_driver_lin=2021.13.11.0.23
3737
fpga_driver_win=2021.13.11.0.23
3838
# NVidia CUDA driver

clang/lib/Driver/Driver.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3013,10 +3013,19 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
30133013
// TODO: The static archive processing for SYCL is done in a different
30143014
// manner than the OpenMP processing. We should try and refactor this
30153015
// to use the OpenMP flow (adding -l<name> to the llvm-link step)
3016-
auto resolveStaticLib = [&](StringRef LibName) -> bool {
3016+
auto resolveStaticLib = [&](StringRef LibName, bool IsStatic) -> bool {
30173017
if (!LibName.startswith("-l"))
30183018
return false;
30193019
for (auto LPath : LibPaths) {
3020+
if (!IsStatic) {
3021+
// Current linking state is dynamic. We will first check for the
3022+
// shared object and not pull in the static library if it is found.
3023+
SmallString<128> SoLibName(LPath);
3024+
llvm::sys::path::append(SoLibName,
3025+
Twine("lib" + LibName.substr(2) + ".so").str());
3026+
if (llvm::sys::fs::exists(SoLibName))
3027+
return false;
3028+
}
30203029
SmallString<128> FullName(LPath);
30213030
llvm::sys::path::append(FullName,
30223031
Twine("lib" + LibName.substr(2) + ".a").str());
@@ -3029,6 +3038,7 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
30293038
};
30303039
for (const auto *A : Args) {
30313040
std::string FileName = A->getAsString(Args);
3041+
static bool IsLinkStateStatic(Args.hasArg(options::OPT_static));
30323042
auto addLibArg = [&](StringRef LibName) -> bool {
30333043
if (isStaticArchiveFile(LibName) ||
30343044
(IncludeObj && isObjectFile(LibName.str()))) {
@@ -3088,7 +3098,20 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
30883098
LibArgs.push_back(Args.MakeArgString(V));
30893099
return;
30903100
}
3091-
resolveStaticLib(V);
3101+
if (optionMatches("-Bstatic", V.str()) ||
3102+
optionMatches("-dn", V.str()) ||
3103+
optionMatches("-non_shared", V.str()) ||
3104+
optionMatches("-static", V.str())) {
3105+
IsLinkStateStatic = true;
3106+
return;
3107+
}
3108+
if (optionMatches("-Bdynamic", V.str()) ||
3109+
optionMatches("-dy", V.str()) ||
3110+
optionMatches("-call_shared", V.str())) {
3111+
IsLinkStateStatic = false;
3112+
return;
3113+
}
3114+
resolveStaticLib(V, IsLinkStateStatic);
30923115
};
30933116
if (Value[0] == '@') {
30943117
// Found a response file, we want to expand contents to try and
@@ -3128,7 +3151,7 @@ getLinkerArgs(Compilation &C, DerivedArgList &Args, bool IncludeObj = false) {
31283151
continue;
31293152
}
31303153
if (A->getOption().matches(options::OPT_l))
3131-
resolveStaticLib(A->getAsString(Args));
3154+
resolveStaticLib(A->getAsString(Args), IsLinkStateStatic);
31323155
}
31333156
return LibArgs;
31343157
}

clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,15 @@ int main() {
100100
// CHECK-SAME: %"struct.cl::sycl::range"* noundef byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+6]],
101101
// CHECK-SAME: %"struct.cl::sycl::range"* noundef byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+7]],
102102
// CHECK-SAME: %"struct.cl::sycl::id"* noundef byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+8]])
103-
// CHECK-SAME: !kernel_arg_runtime_aligned !5
103+
// CHECK-SAME: !kernel_arg_runtime_aligned ![[#RTALIGNED1:]]
104104

105105
// Check kernel_readOnlyAcc parameters
106106
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_readOnlyAcc
107107
// CHECK-SAME: i32 addrspace(1)* noundef readonly align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]],
108108
// CHECK-SAME: %"struct.cl::sycl::range"* noundef byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+1]],
109109
// CHECK-SAME: %"struct.cl::sycl::range"* noundef byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+2]],
110110
// CHECK-SAME: %"struct.cl::sycl::id"* noundef byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+3]]
111-
// CHECK-SAME: !kernel_arg_runtime_aligned !14
111+
// CHECK-SAME: !kernel_arg_runtime_aligned ![[#RTALIGNED2:]]
112112

113113
// Check kernel_B parameters
114114
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_B
@@ -130,7 +130,7 @@ int main() {
130130
// CHECK-SAME: %"struct.cl::sycl::range.5"* noundef byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+1]],
131131
// CHECK-SAME: %"struct.cl::sycl::range.5"* noundef byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+2]],
132132
// CHECK-SAME: %"struct.cl::sycl::id.6"* noundef byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+3]]
133-
// CHECK-SAME: !kernel_arg_runtime_aligned !14
133+
// CHECK-SAME: !kernel_arg_runtime_aligned ![[#RTALIGNED2]]
134134

135135
// Check kernel_acc_raw_ptr parameters
136136
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_acc_raw_ptr
@@ -139,7 +139,7 @@ int main() {
139139
// CHECK-SAME: %"struct.cl::sycl::range"* noundef byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+2]],
140140
// CHECK-SAME: %"struct.cl::sycl::id"* noundef byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+3]]
141141
// CHECK-SAME: i32 addrspace(1)* noundef align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]]
142-
// CHECK-SAME: !kernel_arg_runtime_aligned !26
142+
// CHECK-SAME: !kernel_arg_runtime_aligned ![[#RTALIGNED3:]]
143143

144144
// Check esimd_kernel_with_acc parameters
145145
// CHECK: define {{.*}}spir_kernel void @{{.*}}esimd_kernel_with_acc
@@ -148,6 +148,6 @@ int main() {
148148
// Check kernel-arg-runtime-aligned metadata.
149149
// The value of any metadata element is 1 for any kernel arguments
150150
// that corresponds to the base pointer of an accessor and 0 otherwise.
151-
// CHECK: !5 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false}
152-
// CHECK: !14 = !{i1 true, i1 false, i1 false, i1 false}
153-
// CHECK: !26 = !{i1 true, i1 false, i1 false, i1 false, i1 false}
151+
// CHECK: ![[#RTALIGNED1]] = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false}
152+
// CHECK: ![[#RTALIGNED2]] = !{i1 true, i1 false, i1 false, i1 false}
153+
// CHECK: ![[#RTALIGNED3]] = !{i1 true, i1 false, i1 false, i1 false, i1 false}

clang/test/Driver/sycl-offload-static-lib-2.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,14 @@
3535
// STATIC_LIB: ld{{.*}} "{{.*}}_lib.{{(a|lo)}}" "[[HOSTOBJ]]"
3636

3737
// Test using -l<name> style for passing libraries.
38+
// RUN: mkdir -p %t_dir
39+
// RUN: touch %t_dir/liblin64.so
3840
// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \
3941
// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB,STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown
42+
// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -static -L%t_dir -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \
43+
// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB,STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown
44+
// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -Xlinker -Bstatic -L%t_dir -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \
45+
// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB,STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown
4046
// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=nvptx64-nvidia-cuda -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \
4147
// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB,STATIC_L_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50
4248
// STATIC_L_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-inputs=[[INPUTO:.+\.o]]" "-outputs=[[HOSTOBJ:.+\.o]],{{.+\.o}}"
@@ -49,6 +55,9 @@
4955
// STATIC_L_LIB: ld{{.*}} "-llin64" "[[HOSTOBJ]]"
5056

5157
// non-fat libraries should not trigger the unbundling step.
58+
// presence of shared object should not trigger unbundling step.
59+
// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -L%t_dir -L%S/Inputs/SYCL -llin64 -### 2>&1 \
60+
// RUN: | FileCheck %s -check-prefixes=NO_STATIC_UNBUNDLE
5261
// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -lc -lm -ldl -### 2>&1 \
5362
// RUN: | FileCheck %s -check-prefixes=NO_STATIC_UNBUNDLE
5463
// NO_STATIC_UNBUNDLE-NOT: clang-offload-bundler{{.*}} "-type=aoo" {{.*}} "-inputs={{.*}}lib{{.*}}.a"

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -855,13 +855,13 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
855855
// Abs, Neg bf16, bf16x2
856856
//
857857

858-
def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $dst;", Int16Regs,
858+
def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
859859
Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>;
860-
def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $dst;", Int32Regs,
860+
def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
861861
Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>;
862-
def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $dst;", Int16Regs,
862+
def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
863863
Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>;
864-
def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $dst;", Int32Regs,
864+
def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
865865
Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>;
866866

867867
//

sycl/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,5 +414,3 @@ add_custom_target(deploy-sycl-toolchain
414414

415415
# SYCL Runtime documentation
416416
add_subdirectory(doc)
417-
418-
add_subdirectory(examples)

0 commit comments

Comments
 (0)