Skip to content

Commit 4dc5cd6

Browse files
committed
PHPLIB-1237 Implement parallel benchmarks
1 parent ec6c431 commit 4dc5cd6

File tree

4 files changed

+202
-0
lines changed

4 files changed

+202
-0
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
<?php
2+
3+
namespace MongoDB\Benchmark\DriverBench;
4+
5+
use Generator;
6+
use MongoDB\Benchmark\Fixtures\Data;
7+
use MongoDB\Benchmark\Utils;
8+
use MongoDB\BSON\Document;
9+
use MongoDB\Collection;
10+
use PhpBench\Attributes\AfterClassMethods;
11+
use PhpBench\Attributes\BeforeClassMethods;
12+
use PhpBench\Attributes\BeforeMethods;
13+
use PhpBench\Attributes\ParamProviders;
14+
use PhpBench\Attributes\Revs;
15+
use RuntimeException;
16+
17+
use function array_chunk;
18+
use function array_map;
19+
use function ceil;
20+
use function count;
21+
use function file;
22+
use function file_get_contents;
23+
use function file_put_contents;
24+
use function is_dir;
25+
use function mkdir;
26+
use function pcntl_fork;
27+
use function pcntl_waitpid;
28+
use function range;
29+
use function sprintf;
30+
use function str_repeat;
31+
use function sys_get_temp_dir;
32+
use function unlink;
33+
34+
use const FILE_IGNORE_NEW_LINES;
35+
use const FILE_NO_DEFAULT_CONTEXT;
36+
use const FILE_SKIP_EMPTY_LINES;
37+
38+
/**
39+
* For accurate results, run benchmarks on a standalone server.
40+
*
41+
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#parallel
42+
*/
43+
#[BeforeClassMethods('beforeClass')]
44+
#[AfterClassMethods('afterClass')]
45+
final class ParallelBench
46+
{
47+
/** @var string[] */
48+
private static array $files = [];
49+
50+
public static function beforeClass(): void
51+
{
52+
// Generate files
53+
$fileContents = str_repeat(file_get_contents(Data::LDJSON_FILE_PATH), 5_000);
54+
foreach (self::getFileNames() as $file) {
55+
file_put_contents($file, $fileContents);
56+
}
57+
}
58+
59+
public static function afterClass(): void
60+
{
61+
foreach (self::getFileNames() as $file) {
62+
unlink($file);
63+
}
64+
65+
self::$files = [];
66+
}
67+
68+
/**
69+
* Parallel: LDJSON multi-file import
70+
* Using single thread
71+
*
72+
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
73+
*/
74+
#[BeforeMethods('beforeMultiFileImport')]
75+
#[Revs(1)]
76+
public function benchMultiFileImport(): void
77+
{
78+
$collection = Utils::getCollection();
79+
foreach (self::getFileNames() as $file) {
80+
self::importFile($file, $collection);
81+
}
82+
}
83+
84+
/**
85+
* Parallel: LDJSON multi-file import
86+
* Using multiple forked threads
87+
*
88+
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#ldjson-multi-file-import
89+
* @param array{processes:int, files:string[], batchSize:int} $params
90+
*/
91+
#[BeforeMethods('beforeMultiFileImport')]
92+
#[ParamProviders(['provideProcessesParameter', 'provideMultiFileImportParameters'])]
93+
#[Revs(1)]
94+
public function benchMultiFileImportFork(array $params): void
95+
{
96+
$pids = [];
97+
foreach ($params['files'] as $files) {
98+
// Wait for a child process to finish if we have reached the maximum number of processes
99+
if (count($pids) >= $params['processes']) {
100+
$pid = pcntl_waitpid(-1, $status);
101+
unset($pids[$pid]);
102+
}
103+
104+
$pid = pcntl_fork();
105+
if ($pid === 0) {
106+
// If we reset, we can garantee that we get a new manager in the child process
107+
// If we don't reset, we will get the same manager client_zval in the child process
108+
// and share the libmongoc client.
109+
Utils::reset();
110+
$collection = Utils::getCollection();
111+
112+
foreach ($files as $file) {
113+
self::importFile($file, $collection);
114+
}
115+
116+
// Exit the child process
117+
exit(0);
118+
}
119+
120+
if ($pid === -1) {
121+
throw new RuntimeException('Failed to fork');
122+
}
123+
124+
// Keep the forked process id to wait for it later
125+
$pids[$pid] = true;
126+
}
127+
128+
// Wait for all child processes to finish
129+
while ($pids !== []) {
130+
$pid = pcntl_waitpid(-1, $status);
131+
unset($pids[$pid]);
132+
}
133+
}
134+
135+
public static function provideProcessesParameter(): Generator
136+
{
137+
// Max number of forked processes
138+
for ($i = 1; $i <= 30; $i = (int) ceil($i * 1.25)) {
139+
yield $i . 'fork' => ['processes' => $i];
140+
}
141+
}
142+
143+
public static function provideMultiFileImportParameters(): Generator
144+
{
145+
$files = self::getFileNames();
146+
147+
// Chunk of file names to be handled by each processes
148+
for ($i = 1; $i <= 10; $i += 3) {
149+
yield 'by ' . $i => ['files' => array_chunk($files, $i)];
150+
}
151+
}
152+
153+
public function beforeMultiFileImport(): void
154+
{
155+
$database = Utils::getDatabase();
156+
$database->drop();
157+
$database->createCollection(Utils::getCollectionName());
158+
}
159+
160+
public function afterMultiFileImport(): void
161+
{
162+
foreach (self::$files as $file) {
163+
unlink($file);
164+
}
165+
166+
unset($this->files);
167+
}
168+
169+
private static function importFile(string $file, Collection $collection): void
170+
{
171+
// Read file contents into BSON documents
172+
$docs = array_map(
173+
static fn (string $line) => Document::fromJSON($line),
174+
file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES | FILE_NO_DEFAULT_CONTEXT),
175+
);
176+
// Insert documents in bulk
177+
$collection->insertMany($docs);
178+
}
179+
180+
private static function getFileNames(): array
181+
{
182+
$tempDir = sys_get_temp_dir() . '/mongodb-php-benchmark';
183+
if (! is_dir($tempDir)) {
184+
mkdir($tempDir);
185+
}
186+
187+
return array_map(
188+
static fn (int $i) => sprintf('%s/%03d.txt', $tempDir, $i),
189+
//range(0, 99),
190+
range(0, 5),
191+
);
192+
}
193+
}

benchmark/Fixtures/Data.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ final class Data
1616
public const LARGE_FILE_PATH = __DIR__ . '/data/large_doc.json';
1717
public const SMALL_FILE_PATH = __DIR__ . '/data/small_doc.json';
1818
public const TWEET_FILE_PATH = __DIR__ . '/data/tweet.json';
19+
public const LDJSON_FILE_PATH = __DIR__ . '/data/ldjson.json';
1920

2021
public static function readJsonFile(string $path): array
2122
{

benchmark/Fixtures/data/ldjson.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"text":"@wildfits you're not getting one.....","in_reply_to_status_id":22773233453,"retweet_count":null,"contributors":null,"created_at":"Thu Sep 02 19:38:18 +0000 2010","geo":null,"source":"web","coordinates":null,"in_reply_to_screen_name":"wildfits","truncated":false,"entities":{"user_mentions":[{"indices":[0,9],"screen_name":"wildfits","name":"Mairin Goetzinger","id":41832464}],"urls":[],"hashtags":[]},"retweeted":false,"place":null,"user":{"friends_count":179,"profile_sidebar_fill_color":"7a7a7a","location":"Minneapols, MN/Brookings SD","verified":false,"follow_request_sent":null,"favourites_count":0,"profile_sidebar_border_color":"a3a3a3","profile_image_url":"http://a1.twimg.com/profile_images/1110614677/Screen_shot_2010-08-25_at_10.12.40_AM_normal.png","geo_enabled":false,"created_at":"Sun Aug 17 00:23:13 +0000 2008","description":"graphic designer + foodie, with a love of music, movies, running, design, + the outdoors!","time_zone":"Mountain Time (US & Canada)","url":"http://jessiefarris.com/","screen_name":"jessiekf","notifications":null,"profile_background_color":"303030","listed_count":1,"lang":"en"}}

benchmark/Utils.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,11 @@ public static function getCollectionName(): string
4343
{
4444
return 'perftest';
4545
}
46+
47+
public static function reset(): void
48+
{
49+
self::$client = null;
50+
self::$database = null;
51+
self::$collection = null;
52+
}
4653
}

0 commit comments

Comments
 (0)