Skip to content

PHPLIB-1236 Implement Multi-Doc Benchmarks #1165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions benchmark/DriverBench/GridFSBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?php

namespace MongoDB\Benchmark\DriverBench;

use MongoDB\Benchmark\Fixtures\Data;
use MongoDB\Benchmark\Utils;
use MongoDB\GridFS\Bucket;
use PhpBench\Attributes\AfterMethods;
use PhpBench\Attributes\BeforeMethods;

/**
* For accurate results, run benchmarks on a standalone server.
*
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#multi-doc-benchmarks
*/
#[AfterMethods('afterAll')]
final class GridFSBench
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extracted GridFS benchmarks into a specific class because they are very different that other collection benchmarks. They need some properties.

{
/** @var resource */
private $stream;
private Bucket $bucket;
private mixed $id;

/** @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#gridfs-upload */
#[BeforeMethods('beforeUpload')]
public function benchUpload(): void
{
$this->bucket->uploadFromStream('test', $this->stream);
}

public function beforeUpload(): void
{
$database = Utils::getDatabase();
$database->drop();

$this->bucket = $database->selectGridFSBucket();
// Init the GridFS bucket
$this->bucket->uploadFromStream('init', Data::getStream(1));
// Prepare the 50MB stream to upload
$this->stream = Data::getStream(50 * 1024 * 1024);
}

/** @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#gridfs-download */
#[BeforeMethods('beforeDownload')]
public function benchDownload(): void
{
$this->bucket->downloadToStream($this->id, $this->stream);
}

public function beforeDownload(): void
{
$database = Utils::getDatabase();
$database->drop();

$this->bucket = $database->selectGridFSBucket();
// Upload a 50MB file
$this->id = $this->bucket->uploadFromStream('init', Data::getStream(50 * 1024 * 1024));
// Prepare the stream to receive the download
$this->stream = Data::getStream(0);
}

public function afterAll(): void
{
unset($this->bucket, $this->stream, $this->id);
Utils::getDatabase()->drop();
}
}
99 changes: 99 additions & 0 deletions benchmark/DriverBench/MultiDocBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
<?php

namespace MongoDB\Benchmark\DriverBench;

use Generator;
use MongoDB\Benchmark\Fixtures\Data;
use MongoDB\Benchmark\Utils;
use MongoDB\BSON\Document;
use PhpBench\Attributes\BeforeMethods;
use PhpBench\Attributes\ParamProviders;

use function array_fill;
use function file_get_contents;

/**
* For accurate results, run benchmarks on a standalone server.
*
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#multi-doc-benchmarks
*/
final class MultiDocBench
{
/**
* @see https://github.com/mongodb/specifications/blob/master/source/benchmarking/benchmarking.rst#find-many-and-empty-the-cursor
* @param array{options: array} $params
*/
#[BeforeMethods('beforeFindMany')]
#[ParamProviders('provideFindManyParams')]
public function benchFindMany(array $params): void
{
$collection = Utils::getCollection();

// phpcs:ignore Generic.CodeAnalysis.EmptyStatement.DetectedForeach
// phpcs:ignore Generic.ControlStructures.InlineControlStructure.NotAllowed
foreach ($collection->find([], $params['options']) as $document);
}

public function beforeFindMany(): void
{
$collection = Utils::getCollection();
$collection->drop();

$tweet = Data::readJsonFile(Data::TWEET_FILE_PATH);
$documents = array_fill(0, 9_999, $tweet);
$collection->insertMany($documents);
}

public static function provideFindManyParams(): Generator
{
yield 'Driver default typemap' => [
'options' => [],
];

yield 'Raw BSON' => [
'options' => ['typeMap' => ['root' => 'bson']],
];
}

/**
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#small-doc-bulk-insert
* @see https://github.com/mongodb/specifications/blob/ddfc8b583d49aaf8c4c19fa01255afb66b36b92e/source/benchmarking/benchmarking.rst#large-doc-bulk-insert
* @param array{documents: array} $params
*/
#[BeforeMethods('beforeBulkInsert')]
#[ParamProviders('provideBulkInsertParams')]
public function benchBulkInsert(array $params): void
{
$collection = Utils::getCollection();

// phpcs:ignore Generic.CodeAnalysis.EmptyStatement.DetectedForeach
// phpcs:ignore Generic.ControlStructures.InlineControlStructure.NotAllowed
$collection->insertMany($params['documents']);
}

public function beforeBulkInsert(): void
{
$database = Utils::getDatabase();
$database->dropCollection(Utils::getCollectionName());
$database->createCollection(Utils::getCollectionName());
}

public static function provideBulkInsertParams(): Generator
{
yield 'Small doc' => [
'documents' => array_fill(0, 9_999, Data::readJsonFile(Data::SMALL_FILE_PATH)),
];

yield 'Small BSON doc' => [
'documents' => array_fill(0, 9_999, Document::fromJSON(file_get_contents(Data::SMALL_FILE_PATH))),
];

yield 'Large doc' => [
'documents' => array_fill(0, 9, Data::readJsonFile(Data::LARGE_FILE_PATH)),
];

yield 'Large BSON doc' => [
'documents' => array_fill(0, 9, Document::fromJSON(file_get_contents(Data::LARGE_FILE_PATH))),
];
}
}
2 changes: 1 addition & 1 deletion benchmark/Extension/EnvironmentProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ private function getServerInfo(Manager $manager): array
private function getBuildInfo(Manager $manager): array
{
$buildInfo = $manager->executeCommand(
Utils::getDatabase(),
Utils::getDatabaseName(),
new Command(['buildInfo' => 1]),
new ReadPreference(ReadPreference::PRIMARY),
)->toArray()[0];
Expand Down
18 changes: 18 additions & 0 deletions benchmark/Fixtures/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
namespace MongoDB\Benchmark\Fixtures;

use function file_get_contents;
use function fopen;
use function fwrite;
use function json_decode;
use function rewind;
use function str_repeat;

use const JSON_THROW_ON_ERROR;

Expand All @@ -17,4 +21,18 @@ public static function readJsonFile(string $path): array
{
return json_decode(file_get_contents($path), true, 512, JSON_THROW_ON_ERROR);
}

/**
* Generates an in-memory stream of the given size.
*
* @return resource
*/
public static function getStream(int $size)
{
$stream = fopen('php://memory', 'w+');
fwrite($stream, str_repeat("\0", $size));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dataset, designated GRIDFS_LARGE (disk file 'gridfs_large.bin'), consists of a single file containing about 50 MB of random data.

I don't need to commit a 50MB file full of NULL characters from the spec. I can generate it on-demand in-memory.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, thank you!

rewind($stream);

return $stream;
}
}
22 changes: 12 additions & 10 deletions benchmark/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,43 @@

use MongoDB\Client;
use MongoDB\Collection;
use MongoDB\Database;

use function getenv;

final class Utils
{
private static ?Client $client;
private static ?Database $database;
private static ?Collection $collection;

public static function getClient(): Client
{
return self::$client ??= self::createClient();
return self::$client ??= new Client(self::getUri());
}

public static function getDatabase(): Database
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored to give access to an instance of each library objects, and cache them.

{
return self::$database ??= self::getClient()->selectDatabase(self::getDatabaseName());
}

public static function getCollection(): Collection
{
return self::$collection ??= self::createCollection();
return self::$collection ??= self::getDatabase()->selectCollection(self::getCollectionName());
}

public static function getUri(): string
{
return getenv('MONGODB_URI') ?: 'mongodb://localhost:27017/';
}

public static function getDatabase(): string
public static function getDatabaseName(): string
{
return getenv('MONGODB_DATABASE') ?: 'phplib_test';
}

private static function createClient(): Client
{
return new Client(self::getUri());
}

private static function createCollection(): Collection
public static function getCollectionName(): string
{
return self::getClient()->selectCollection(self::getDatabase(), 'perftest');
return 'perftest';
}
}
1 change: 1 addition & 0 deletions phpbench.json.dist
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"runner.bootstrap": "vendor/autoload.php",
"runner.file_pattern": "*Bench.php",
"runner.path": "benchmark",
"runner.php_config": { "memory_limit": "1G" },
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

50MB file takes a lot more than expected to be downloaded in-memory.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to profile why this is taking so much memory.

Copy link
Member Author

@GromNaN GromNaN Sep 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Profiling result: it's the substr that uses the most memory.

while (strlen($data) < $length) {
if ($this->bufferOffset >= strlen($this->buffer) && ! $this->initBufferFromNextChunk()) {
break;
}
$initialDataLength = strlen($data);
$data .= substr($this->buffer, $this->bufferOffset, $length - $initialDataLength);
$this->bufferOffset += strlen($data) - $initialDataLength;
}

image

"runner.iterations": 3,
"runner.revs": 10
}