Skip to content

Commit 626643a

Browse files
[SYCL] Optimize sycl::detail::memcpy (#13751)
Fixes #7901.
1 parent 0300ac9 commit 626643a

File tree

4 files changed

+134
-227
lines changed

4 files changed

+134
-227
lines changed

sycl/include/sycl/detail/memcpy.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@
88

99
#pragma once
1010

11-
#include <cstddef>
11+
#include <cstring>
1212

1313
namespace sycl {
1414
inline namespace _V1 {
1515
namespace detail {
1616
inline void memcpy(void *Dst, const void *Src, size_t Size) {
17-
char *Destination = reinterpret_cast<char *>(Dst);
18-
const char *Source = reinterpret_cast<const char *>(Src);
19-
for (size_t I = 0; I < Size; ++I) {
20-
Destination[I] = Source[I];
21-
}
17+
#ifdef __SYCL_DEVICE_ONLY__
18+
__builtin_memcpy(Dst, Src, Size);
19+
#else
20+
std::memcpy(Dst, Src, Size);
21+
#endif
2222
}
2323
} // namespace detail
2424
} // namespace _V1

sycl/test/.clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
BasedOnStyle: LLVM
2-
CommentPragmas: "RUN|FAIL|REQUIRES|UNSUPPORTED|CHECK|expected-"
2+
CommentPragmas: "RUN|FAIL|REQUIRES|UNSUPPORTED|CHECK|expected-|update_cc_test_checks.py"

sycl/test/check_device_code/vector/vector_as.cpp

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,36 +19,12 @@ template SYCL_EXTERNAL sycl::vec<int, 4> sycl::vec<float, 4>::as<sycl::vec<int,
1919
// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZNK4sycl3_V13vecIfLi4EE2asINS1_IiLi4EEEEET_v(
2020
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] {
2121
// CHECK-NEXT: entry:
22-
// CHECK-NEXT: br label [[FOR_COND_I:%.*]]
23-
// CHECK: for.cond.i:
24-
// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
25-
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 16
26-
// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAIL6MEMCPYEPVPKVM_EXIT:%.*]]
27-
// CHECK: for.body.i:
28-
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[THIS]], i64 [[I_0_I]]
29-
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA7:![0-9]+]]
30-
// CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]]
31-
// CHECK-NEXT: store i8 [[TMP0]], ptr addrspace(4) [[ARRAYIDX1_I]], align 1, !tbaa [[TBAA7]]
32-
// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1
33-
// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
34-
// CHECK: _ZN4sycl3_V16detail6memcpyEPvPKvm.exit:
22+
// CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false)
3523
// CHECK-NEXT: ret void
3624
//
3725
//
3826
// CHECK-PREVIEW-LABEL: define weak_odr dso_local spir_func void @_ZNK4sycl3_V13vecIfLi4EE2asINS1_IiLi4EEEEET_v(
3927
// CHECK-PREVIEW-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] {
4028
// CHECK-PREVIEW-NEXT: entry:
41-
// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I:%.*]]
42-
// CHECK-PREVIEW: for.cond.i:
43-
// CHECK-PREVIEW-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
44-
// CHECK-PREVIEW-NEXT: [[CMP_I:%.*]] = icmp ult i64 [[I_0_I]], 16
45-
// CHECK-PREVIEW-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAIL6MEMCPYEPVPKVM_EXIT:%.*]]
46-
// CHECK-PREVIEW: for.body.i:
47-
// CHECK-PREVIEW-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[THIS]], i64 [[I_0_I]]
48-
// CHECK-PREVIEW-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I]], align 1, !tbaa [[TBAA7:![0-9]+]]
49-
// CHECK-PREVIEW-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[AGG_RESULT]], i64 [[I_0_I]]
50-
// CHECK-PREVIEW-NEXT: store i8 [[TMP0]], ptr addrspace(4) [[ARRAYIDX1_I]], align 1, !tbaa [[TBAA7]]
51-
// CHECK-PREVIEW-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1
52-
// CHECK-PREVIEW-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
53-
// CHECK-PREVIEW: _ZN4sycl3_V16detail6memcpyEPvPKvm.exit:
29+
// CHECK-PREVIEW-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false)
5430
// CHECK-PREVIEW-NEXT: ret void

0 commit comments

Comments
 (0)