Skip to content

Commit 19140b1

Browse files
committed
Add new benchmarks
1 parent 508162e commit 19140b1

File tree

5 files changed

+772
-10
lines changed

5 files changed

+772
-10
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ endif()
313313
# compiler is required. Moreover, if these options are not set, CMake will set
314314
# up a strict C build, without C++ support.
315315
set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT"
316-
"UMF_BUILD_BENCHMARKS_MT")
316+
"UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS")
317317
foreach(option_name ${OPTIONS_REQUIRING_CXX})
318318
if(${option_name})
319319
enable_language(CXX)

benchmark/CMakeLists.txt

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,24 @@
1-
# Copyright (C) 2023 Intel Corporation
1+
# Copyright (C) 2023-2024 Intel Corporation
22
# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
33
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
44

5+
include(FetchContent)
6+
FetchContent_Declare(
7+
googlebenchmark
8+
GIT_REPOSITORY https://github.com/google/benchmark.git
9+
GIT_TAG v1.9.0)
10+
11+
set(BENCHMARK_ENABLE_GTEST_TESTS
12+
OFF
13+
CACHE BOOL "" FORCE)
14+
set(BENCHMARK_ENABLE_TESTING
15+
OFF
16+
CACHE BOOL "" FORCE)
17+
set(BENCHMARK_ENABLE_INSTALL
18+
OFF
19+
CACHE BOOL "" FORCE)
20+
FetchContent_MakeAvailable(googlebenchmark)
21+
522
# In MSVC builds, there is no way to determine the actual build type during the
623
# CMake configuration step. Therefore, this message is printed in all MSVC
724
# builds.
@@ -32,7 +49,7 @@ function(add_umf_benchmark)
3249
"${multiValueArgs}"
3350
${ARGN})
3451

35-
set(BENCH_NAME umf-bench-${ARG_NAME})
52+
set(BENCH_NAME umf-${ARG_NAME})
3653

3754
set(BENCH_LIBS ${ARG_LIBS} umf)
3855

@@ -55,13 +72,17 @@ function(add_umf_benchmark)
5572
COMMAND ${BENCH_NAME}
5673
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
5774

58-
# Benchmark passes if it prints "PASSED" in the output, because ubench of
59-
# scalable pool fails if the confidence interval exceeds maximum permitted
60-
# 2.5%.
61-
set_tests_properties(
62-
${BENCH_NAME} PROPERTIES
63-
LABELS "benchmark"
64-
PASS_REGULAR_EXPRESSION "PASSED")
75+
if("${BENCH_NAME}" STREQUAL "umf-ubench")
76+
# Benchmark passes if it prints "PASSED" in the output, because ubench
77+
# of scalable pool fails if the confidence interval exceeds maximum
78+
# permitted 2.5%.
79+
set_tests_properties(
80+
${BENCH_NAME} PROPERTIES
81+
LABELS "benchmark"
82+
PASS_REGULAR_EXPRESSION "PASSED")
83+
else()
84+
set_tests_properties(${BENCH_NAME} PROPERTIES LABELS "benchmark")
85+
endif()
6586

6687
if(WINDOWS)
6788
# append PATH to DLLs
@@ -120,6 +141,12 @@ add_umf_benchmark(
120141
LIBS ${LIBS_OPTIONAL}
121142
LIBDIRS ${LIB_DIRS})
122143

144+
add_umf_benchmark(
145+
NAME benchmark
146+
SRCS benchmark.cpp
147+
LIBS ${LIBS_OPTIONAL} benchmark::benchmark
148+
LIBDIRS ${LIB_DIRS})
149+
123150
if(UMF_BUILD_BENCHMARKS_MT)
124151
add_umf_benchmark(
125152
NAME multithreaded

benchmark/benchmark.cpp

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
/*
2+
* Copyright (C) 2024 Intel Corporation
3+
*
4+
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*/
8+
9+
#include <benchmark/benchmark.h>
10+
#include <umf/pools/pool_proxy.h>
11+
#include <umf/pools/pool_scalable.h>
12+
#include <umf/providers/provider_level_zero.h>
13+
#include <umf/providers/provider_os_memory.h>
14+
15+
#ifdef UMF_BUILD_LIBUMF_POOL_DISJOINT
16+
#include <umf/pools/pool_disjoint.h>
17+
#endif
18+
19+
#ifdef UMF_BUILD_LIBUMF_POOL_JEMALLOC
20+
#include <umf/pools/pool_jemalloc.h>
21+
#endif
22+
23+
#include "benchmark.hpp"
24+
25+
struct glibc_malloc : public allocator_interface {
26+
unsigned SetUp([[maybe_unused]] ::benchmark::State &state,
27+
unsigned r) override {
28+
return r;
29+
}
30+
void TearDown([[maybe_unused]] ::benchmark::State &state) override {};
31+
void *benchAlloc(size_t size) override { return malloc(size); }
32+
void benchFree(void *ptr, [[maybe_unused]] size_t size) override {
33+
free(ptr);
34+
}
35+
static std::string name() { return "glibc"; }
36+
};
37+
38+
struct os_provider : public provider_interface {
39+
umf_os_memory_provider_params_t params = umfOsMemoryProviderParamsDefault();
40+
void *getParams() override { return &params; }
41+
umf_memory_provider_ops_t *getOps() override {
42+
return umfOsMemoryProviderOps();
43+
}
44+
static std::string name() { return "os_provider"; }
45+
};
46+
47+
template <typename Provider>
48+
struct proxy_pool : public pool_interface<Provider> {
49+
umf_memory_pool_ops_t *
50+
getOps([[maybe_unused]] ::benchmark::State &state) override {
51+
return umfProxyPoolOps();
52+
}
53+
void *getParams([[maybe_unused]] ::benchmark::State &state) override {
54+
return nullptr;
55+
}
56+
static std::string name() { return "proxy_pool<" + Provider::name() + ">"; }
57+
};
58+
59+
#ifdef UMF_BUILD_LIBUMF_POOL_DISJOINT
60+
template <typename Provider>
61+
struct disjoint_pool : public pool_interface<Provider> {
62+
umf_disjoint_pool_params_t disjoint_memory_pool_params;
63+
umf_memory_pool_ops_t *
64+
getOps([[maybe_unused]] ::benchmark::State &state) override {
65+
return umfDisjointPoolOps();
66+
}
67+
void *getParams([[maybe_unused]] ::benchmark::State &state) override {
68+
disjoint_memory_pool_params = umfDisjointPoolParamsDefault();
69+
disjoint_memory_pool_params.SlabMinSize = 4096;
70+
disjoint_memory_pool_params.MaxPoolableSize = 4096 * 16;
71+
disjoint_memory_pool_params.Capacity = 4;
72+
73+
disjoint_memory_pool_params.MinBucketSize = 4096;
74+
return &disjoint_memory_pool_params;
75+
}
76+
static std::string name() {
77+
return "disjoint_pool<" + Provider::name() + ">";
78+
}
79+
};
80+
#endif
81+
82+
#ifdef UMF_BUILD_LIBUMF_POOL_JEMALLOC
83+
template <typename Provider>
84+
struct jemalloc_pool : public pool_interface<Provider> {
85+
umf_memory_pool_ops_t *
86+
getOps([[maybe_unused]] ::benchmark::State &state) override {
87+
return umfJemallocPoolOps();
88+
}
89+
void *getParams([[maybe_unused]] ::benchmark::State &state) override {
90+
return NULL;
91+
}
92+
static std::string name() {
93+
return "jemalloc_pool<" + Provider::name() + ">";
94+
}
95+
};
96+
#endif
97+
98+
template <typename Provider>
99+
struct scalable_pool : public pool_interface<Provider> {
100+
virtual umf_memory_pool_ops_t *
101+
getOps([[maybe_unused]] ::benchmark::State &state) override {
102+
return umfScalablePoolOps();
103+
}
104+
virtual void *
105+
getParams([[maybe_unused]] ::benchmark::State &state) override {
106+
return NULL;
107+
}
108+
static std::string name() {
109+
return "scalable_pool<" + Provider::name() + ">";
110+
}
111+
};
112+
113+
// Benchmarks scenarios:
114+
115+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fix_alloc_size,
116+
glibc_malloc);
117+
118+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_fix)
119+
->Args({10000, 0, 4096})
120+
->Args({10000, 100000, 4096})
121+
->Threads(4)
122+
->Threads(1)
123+
->MinTime(1);
124+
125+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_uniform,
126+
uniform_alloc_size, glibc_malloc);
127+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_uniform)
128+
->Args({10000, 0, 8, 64 * 1024, 8})
129+
->Args({10000, 100000, 8, 64 * 1024, 8})
130+
->Threads(4)
131+
->Threads(1);
132+
133+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fix_alloc_size,
134+
pool_allocator<proxy_pool<os_provider>>);
135+
136+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool)
137+
->Args({10000, 0, 4096})
138+
->Args({10000, 100000, 4096})
139+
->Threads(4)
140+
->Threads(1);
141+
142+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, os_provider, fix_alloc_size,
143+
provider_allocator<os_provider>);
144+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, os_provider)
145+
->Args({10000, 0, 4096})
146+
->Args({10000, 100000, 4096})
147+
->Threads(4)
148+
->Threads(1);
149+
150+
#ifdef UMF_BUILD_LIBUMF_POOL_DISJOINT
151+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix,
152+
fix_alloc_size,
153+
pool_allocator<disjoint_pool<os_provider>>);
154+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix)
155+
->Args({10000, 0, 4096})
156+
->Args({10000, 100000, 4096})
157+
->Threads(4)
158+
->Threads(1);
159+
160+
// Disjoint pool crashes here.
161+
/*UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_uniform,
162+
uniform_alloc_size,
163+
pool_allocator<disjoint_pool<os_provider>>);
164+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform)
165+
->Args({10000, 0, 8, 64 * 1024, 8})
166+
->Args({10000, 100000, 8, 64 * 1024, 8})
167+
->Threads(4)
168+
->Threads(1);
169+
*/
170+
#endif
171+
172+
#ifdef UMF_BUILD_LIBUMF_POOL_JEMALLOC
173+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix,
174+
fix_alloc_size,
175+
pool_allocator<jemalloc_pool<os_provider>>);
176+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_fix)
177+
->Args({10000, 0, 4096})
178+
->Args({10000, 100000, 4096})
179+
->Threads(4)
180+
->Threads(1);
181+
182+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_uniform,
183+
uniform_alloc_size,
184+
pool_allocator<jemalloc_pool<os_provider>>);
185+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_uniform)
186+
->Args({10000, 0, 8, 64 * 1024, 8})
187+
->Args({10000, 100000, 8, 64 * 1024, 8})
188+
->Threads(4)
189+
->Threads(1);
190+
191+
#endif
192+
193+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_fix,
194+
fix_alloc_size,
195+
pool_allocator<scalable_pool<os_provider>>);
196+
197+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_fix)
198+
->Args({10000, 0, 4096})
199+
->Args({10000, 100000, 4096})
200+
// ->Threads(4)
201+
->Threads(1);
202+
203+
UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_uniform,
204+
uniform_alloc_size,
205+
pool_allocator<scalable_pool<os_provider>>);
206+
207+
UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_uniform)
208+
->Args({10000, 0, 8, 64 * 1024, 8})
209+
->Args({10000, 100000, 8, 64 * 1024, 8})
210+
// ->Threads(4)
211+
->Threads(1);
212+
213+
// Multiple allocs/free
214+
215+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_fix,
216+
fix_alloc_size, glibc_malloc);
217+
218+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_fix)
219+
->Args({10000, 0, 4096})
220+
->Args({10000, 100000, 4096})
221+
->Threads(4)
222+
->Threads(1);
223+
224+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_uniform,
225+
uniform_alloc_size, glibc_malloc);
226+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_uniform)
227+
->Args({10000, 0, 8, 64 * 1024, 8})
228+
->Args({10000, 100000, 8, 64 * 1024, 8})
229+
->Threads(4)
230+
->Threads(1);
231+
232+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool,
233+
fix_alloc_size,
234+
pool_allocator<proxy_pool<os_provider>>);
235+
236+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool)
237+
->Args({10000, 0, 4096})
238+
->Args({10000, 100000, 4096})
239+
->Threads(4)
240+
->Threads(1);
241+
242+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider,
243+
fix_alloc_size, provider_allocator<os_provider>);
244+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider)
245+
->Args({10000, 0, 4096})
246+
->Args({10000, 100000, 4096})
247+
->Threads(4)
248+
->Threads(1);
249+
250+
#ifdef UMF_BUILD_LIBUMF_POOL_DISJOINT
251+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix,
252+
fix_alloc_size,
253+
pool_allocator<disjoint_pool<os_provider>>);
254+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix)
255+
->Args({10000, 0, 4096})
256+
->Args({10000, 100000, 4096})
257+
->Threads(4)
258+
->Threads(1);
259+
260+
// Disjoint pool crashes here.
261+
/*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform,
262+
uniform_alloc_size,
263+
pool_allocator<disjoint_pool<os_provider>>);
264+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform)
265+
->Args({10000, 0, 8, 64 * 1024, 8})
266+
->Args({10000, 100000, 8, 64 * 1024, 8})
267+
->Threads(4)
268+
->Threads(1);
269+
*/
270+
#endif
271+
272+
#ifdef UMF_BUILD_LIBUMF_POOL_JEMALLOC
273+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix,
274+
fix_alloc_size,
275+
pool_allocator<jemalloc_pool<os_provider>>);
276+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix)
277+
->Args({10000, 0, 4096})
278+
->Args({10000, 100000, 4096})
279+
->Threads(4)
280+
->Threads(1);
281+
282+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark,
283+
jemalloc_pool_uniform, uniform_alloc_size,
284+
pool_allocator<jemalloc_pool<os_provider>>);
285+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform)
286+
->Args({10000, 0, 8, 64 * 1024, 8})
287+
->Args({10000, 100000, 8, 64 * 1024, 8})
288+
->Threads(4)
289+
->Threads(1);
290+
291+
#endif
292+
293+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_fix,
294+
fix_alloc_size,
295+
pool_allocator<scalable_pool<os_provider>>);
296+
297+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_fix)
298+
->Args({10000, 0, 4096})
299+
->Args({10000, 100000, 4096})
300+
// ->Threads(4)
301+
->Threads(1);
302+
303+
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark,
304+
scalable_pool_uniform, uniform_alloc_size,
305+
pool_allocator<scalable_pool<os_provider>>);
306+
307+
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_uniform)
308+
->Args({10000, 0, 8, 64 * 1024, 8})
309+
->Args({10000, 100000, 8, 64 * 1024, 8})
310+
// ->Threads(4)
311+
->Threads(1);
312+
313+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)