Skip to content

[SYCL][Fusion] Improve kernel fusion e2e tests #14366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions sycl/test-e2e/KernelFusion/cancel_fusion.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s

// Test cancel fusion

// As fusion is cancelled, this test launches two kernels.
// CHECK-COUNT-2: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/complete_fusion.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s

// Test complete fusion without any internalization

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/cooperative_kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s

// Test cooperative kernels are not fused
Expand Down
26 changes: 6 additions & 20 deletions sycl/test-e2e/KernelFusion/different_nd_ranges.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 \
// RUN: %{run} %t.out 2>&1 | FileCheck %s
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: env SYCL_PI_TRACE=2 env SYCL_RT_WARNING_LEVEL=1 \
// RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:64 %{run} %t.out 2>&1 \
// RUN: | FileCheck %s --implicit-check-not "ERROR: JIT compilation for kernel fusion failed with message:"

// Test complete fusion of kernels with different ND-ranges.

// Kernels with different ND-ranges should be fused.
// CHECK-NOT: Cannot fuse kernels with different offsets or local sizes
// CHECK-COUNT-26: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>

Expand Down Expand Up @@ -252,22 +254,6 @@ int main() {
const auto R5 = {5ul};
test({RangeDesc{{10}, R5}, RangeDesc{{20}, R5}, RangeDesc{{30}, R5}});

// Two 1-D kernels with different global sizes and a 2-D kernel with more
// work-items and specified (equal) local size.
test({RangeDesc{{10}, R2}, RangeDesc{{20}, R2}, RangeDesc{{10, 10}, {2, 1}}});

// Three 2-D kernels with different global sizes.
test({RangeDesc{{10, 15}, {2, 5}}, RangeDesc{{20, 10}, {2, 5}},
RangeDesc{{10, 5}, {2, 5}}});

// Three 3-D kernels with different global sizes.
test({RangeDesc{{10, 4, 2}, {5, 2, 1}}, RangeDesc{{20, 2, 4}, {5, 2, 1}},
RangeDesc{{10, 2, 4}, {5, 2, 1}}});

// 1-D, 2-D and 3-D kernels with different global sizes.
test({RangeDesc{{10}, R5}, RangeDesc{{10, 1}, {5, 1}},
RangeDesc{{10, 1, 1}, {5, 1, 1}}});

// Test global sizes that trigger the rounded range kernel insertion.
// Note that we lower the RR threshold when running this test.
test({RangeDesc{67}, RangeDesc{87}, RangeDesc{64}});
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/event_wait_cancel.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// Test validity of events after cancel_fusion.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion where one kernel in the fusion list specifies an
// explicit dependency (via events) on another kernel in the fusion list.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include "fusion_event_test_common.h"

#include <sycl/detail/core.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/jit_caching_multitarget.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// REQUIRES: (gpu && (hip || cuda)), cpu
// RUN: %{build} -fsycl-embed-ir -O2 -o %t.out
// RUN: %{build} %{embed-ir} -O2 -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --implicit-check-not "WRONG a VALUE" --implicit-check-not "WRONG b VALUE"

// Test caching for JIT fused kernels when devices with different architectures
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/math_function.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test fusion of a kernel using a math function.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>

#include <sycl/builtins.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/non-kernel-cg.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=2 %{run} %t.out 2>&1 | FileCheck %s

// Test non-kernel device command groups are not fused
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/non_unit_local_size.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion with local internalization specified on the
// accessors, where each work-item processes multiple data-items.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_acc_mem_op.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_buffer_destruction.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_event_wait.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Test fusion cancellation on event::wait() happening before
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_host_accessor.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_host_task.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_queue_destruction.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_queue_wait.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_second_queue.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Windows doesn't yet have full shutdown().
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// For this test, complete_fusion must be supported.
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s

// Test fusion cancellation for requirement between two active fusions.
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/KernelFusion/sync_usm_mem_op.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{build} -o %t.out
// RUN: env SYCL_RT_WARNING_LEVEL=1 %{run} %t.out 2>&1 | FileCheck %s
// Windows doesn't yet have full shutdown().
// UNSUPPORTED: ze_debug && windows
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/usm_no_dependencies.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion using USM pointers.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/work_group_barrier.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion with a combination of kernels that require a work-group
// barrier to be inserted by fusion.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
6 changes: 5 additions & 1 deletion sycl/test-e2e/KernelFusion/wrapped_usm.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// REQUIRES: aspect-usm_shared_allocations
// RUN: %{build} %{embed-ir} -o %t.out
// RUN: %{run} %t.out
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s

// Test complete fusion using an wrapped USM pointer as kernel functor argument.

// The two kernels are fused, so only a single, fused kernel is launched.
// CHECK-COUNT-1: piEnqueueKernelLaunch
// CHECK-NOT: piEnqueueKernelLaunch

#include <sycl/detail/core.hpp>
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/properties/all_properties.hpp>
Expand Down
Loading