Skip to content

Commit fc328cc

Browse files
authored
[SYCL] Free functions and dynamic linking fixes for CUDA/HIP (#17899)
1 parent a89ae41 commit fc328cc

22 files changed

+14
-76
lines changed

llvm/lib/SYCLPostLink/ModuleSplitter.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ bool isGenericBuiltin(StringRef FName) {
121121
}
122122

123123
bool isKernel(const Function &F) {
124-
return F.getCallingConv() == CallingConv::SPIR_KERNEL ||
125-
F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
124+
const auto CC = F.getCallingConv();
125+
return CC == CallingConv::SPIR_KERNEL || CC == CallingConv::AMDGPU_KERNEL ||
126+
CC == CallingConv::PTX_Kernel;
126127
}
127128

128129
bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) {
@@ -697,9 +698,13 @@ static bool mustPreserveGV(const GlobalValue &GV) {
697698
// kernels which are the entry points from host code to device code) that
698699
// cannot be imported which also means that there is no point of having it
699700
// visible outside of the current module.
700-
if (AllowDeviceImageDependencies)
701-
return F->getCallingConv() == CallingConv::SPIR_KERNEL ||
702-
canBeImportedFunction(*F);
701+
if (AllowDeviceImageDependencies) {
702+
const auto CC = F->getCallingConv();
703+
const bool SpirOrGPU = CC == CallingConv::SPIR_KERNEL ||
704+
CC == CallingConv::AMDGPU_KERNEL ||
705+
CC == CallingConv::PTX_Kernel;
706+
return SpirOrGPU || canBeImportedFunction(*F);
707+
}
703708

704709
// Otherwise, we are being even more aggressive: SYCL modules are expected
705710
// to be self-contained, meaning that they have no external dependencies.

sycl/test-e2e/DeviceImageDependencies/NewOffloadDriver/free_function_kernels.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
1+
// UNSUPPORTED: true
2+
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17812
3+
14
// Ensure -fsycl-allow-device-dependencies can work with free function kernels.
25

36
// REQUIRES: aspect-usm_shared_allocations
47
// RUN: %{build} -o %t.out --offload-new-driver -fsycl-allow-device-image-dependencies
58
// RUN: %{run} %t.out
69

7-
// The name mangling for free function kernels currently does not work with PTX.
8-
// UNSUPPORTED: target-nvidia
9-
// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends.
10-
11-
// XFAIL: target-amd
12-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742
13-
1410
#include <iostream>
1511
#include <sycl/detail/core.hpp>
1612
#include <sycl/ext/oneapi/free_function_queries.hpp>

sycl/test-e2e/DeviceImageDependencies/free_function_kernels.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@
44
// RUN: %{build} -o %t.out -fsycl-allow-device-image-dependencies
55
// RUN: %{run} %t.out
66

7-
// The name mangling for free function kernels currently does not work with PTX.
8-
// UNSUPPORTED: cuda
9-
10-
// XFAIL: hip
11-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15742
12-
137
#include <iostream>
148
#include <sycl/detail/core.hpp>
159
#include <sycl/ext/oneapi/free_function_queries.hpp>

sycl/test-e2e/Graph/Explicit/free_function_kernels.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
#define GRAPH_E2E_EXPLICIT
129

sycl/test-e2e/Graph/Explicit/work_group_memory_free_function.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77

8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
10-
118
#define GRAPH_E2E_EXPLICIT
129

1310
#include "../Inputs/work_group_memory_free_function.cpp"

sycl/test-e2e/Graph/RecordReplay/free_function_kernels.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
#define GRAPH_E2E_RECORD_REPLAY
129

sycl/test-e2e/Graph/RecordReplay/work_group_memory_free_function.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77

8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
10-
118
#define GRAPH_E2E_RECORD_REPLAY
129

1310
#include "../Inputs/work_group_memory_free_function.cpp"

sycl/test-e2e/Graph/Update/FreeFunctionKernels/dyn_work_group_memory_basic.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating dynamic_work_group_memory with a new size.
129

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_before_finalize.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a graph node before finalization
129

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_multiple_exec_graphs.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests creating multiple executable graphs from the same modifiable graph and
129
// only updating one of them.

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ordering.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests that updating a graph is ordered with respect to previous executions of
129
// the graph which may be in flight.

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a graph node using index-based explicit update
129

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_3D.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a 3D ND-Range graph kernel node using index-based explicit
129
// update

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_double_update.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a graph node using index-based explicit update
129

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_nodes.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a single dynamic parameter which is registered with multiple
129
// graph nodes

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_multiple_params.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating multiple parameters to a singlegraph node using index-based
129
// explicit update

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_ptr_subgraph.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a graph node in an executable graph that was used as a
129
// subgraph node in another executable graph is not reflected in the graph

sycl/test-e2e/Graph/Update/FreeFunctionKernels/update_with_indices_scalar.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7-
//
8-
// XFAIL: cuda
9-
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/16004
107

118
// Tests updating a graph node scalar argument using index-based explicit update
129

sycl/test-e2e/KernelAndProgram/free_function_apis.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// RUN: %{build} -o %t.out
33
// RUN: %{run} %t.out
44

5-
// The name mangling for free function kernels currently does not work with PTX.
6-
// UNSUPPORTED: cuda
7-
85
#include <iostream>
96
#include <sycl/detail/core.hpp>
107
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>

sycl/test-e2e/KernelAndProgram/free_function_kernels.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// RUN: %{build} -o %t.out
33
// RUN: %{run} %t.out
44

5-
// The name mangling for free function kernels currently does not work with PTX.
6-
// UNSUPPORTED: cuda
7-
85
// This test tests free function kernel code generation and execution.
96

107
#include <iostream>

sycl/test-e2e/WorkGroupMemory/reduction_free_function.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// RUN: %{build} -o %t.out
33
// RUN: %{run} %t.out
44

5-
// UNSUPPORTED: cuda
6-
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16004
7-
85
#include "common_free_function.hpp"
96

107
// Basic usage reduction test using free function kernels.

sycl/test/e2e_test_requirements/no-unsupported-without-info.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
// tests to match the required format and in that case you should just update
5555
// (i.e. reduce) the number and the list below.
5656
//
57-
// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 276
57+
// NUMBER-OF-UNSUPPORTED-WITHOUT-INFO: 273
5858
//
5959
// List of improperly UNSUPPORTED tests.
6060
// Remove the CHECK once the test has been properly UNSUPPORTED.
@@ -93,7 +93,6 @@
9393
// CHECK-NEXT: Basic/kernel_info_attr.cpp
9494
// CHECK-NEXT: Basic/submit_time.cpp
9595
// CHECK-NEXT: DeviceImageDependencies/dynamic.cpp
96-
// CHECK-NEXT: DeviceImageDependencies/free_function_kernels.cpp
9796
// CHECK-NEXT: DeviceImageDependencies/math_device_lib.cpp
9897
// CHECK-NEXT: DeviceImageDependencies/objects.cpp
9998
// CHECK-NEXT: DeviceImageDependencies/singleDynamicLibrary.cpp
@@ -205,8 +204,6 @@
205204
// CHECK-NEXT: InvokeSimd/Regression/ImplicitSubgroup/call_vadd_1d_spill.cpp
206205
// CHECK-NEXT: InvokeSimd/Regression/call_vadd_1d_spill.cpp
207206
// CHECK-NEXT: KernelAndProgram/cache-build-result.cpp
208-
// CHECK-NEXT: KernelAndProgram/free_function_apis.cpp
209-
// CHECK-NEXT: KernelAndProgram/free_function_kernels.cpp
210207
// CHECK-NEXT: KernelAndProgram/kernel-bundle-merge-options-env.cpp
211208
// CHECK-NEXT: KernelAndProgram/kernel-bundle-merge-options.cpp
212209
// CHECK-NEXT: KernelAndProgram/level-zero-static-link-flow.cpp

0 commit comments

Comments
 (0)