Skip to content

perf(NODE-4726): bench many strings in large batch in parallel #516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ coverage/
docs/public

*.0x
*.cpuprofile
*.heapprofile
81 changes: 55 additions & 26 deletions etc/benchmarks/lib_runner.mjs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
/* eslint-disable @typescript-eslint/explicit-module-boundary-types */
/* eslint-disable @typescript-eslint/no-var-requires */
import { performance } from 'perf_hooks';
import { createHistogram } from 'perf_hooks';
import { readFile } from 'fs/promises';
import { cpus, totalmem } from 'os';
import { exec as execCb } from 'child_process';
import { promisify } from 'util';
import { promisify, types } from 'util';
import { writeFile } from 'fs/promises';
import v8Profiler from 'v8-profiler-next';
import chalk from 'chalk';
const exec = promisify(execCb);

const hw = cpus();
Expand All @@ -23,27 +26,26 @@ export const systemInfo = iterations =>
export const readJSONFile = async path =>
JSON.parse(await readFile(new URL(path, import.meta.url), { encoding: 'utf8' }));

function average(array) {
let sum = 0;
for (const value of array) sum += value;
return sum / array.length;
}

function testPerformance(lib, [fn, arg], iterations) {
let measurements = [];
async function testPerformance(lib, [fn, arg], iterations) {
let thrownError = null;
const histogram = createHistogram();
for (let i = 0; i < iterations; i++) {
const start = performance.now();
try {
fn(i, lib, arg);
if (types.isAsyncFunction(fn)) {
histogram.recordDelta();
await fn(i, lib, arg);
histogram.recordDelta();
} else {
histogram.recordDelta();
fn(i, lib, arg);
histogram.recordDelta();
}
} catch (error) {
thrownError = error;
break;
}
const end = performance.now();
measurements.push(end - start);
}
return { result: average(measurements).toFixed(8), thrownError };
return { histogram, thrownError };
}

export function getCurrentLocalBSON(libs) {
Expand Down Expand Up @@ -73,12 +75,14 @@ export async function getLibs() {
lib: { ...legacyBSON, ...legacyBSON.prototype },
version: (await readJSONFile('../../node_modules/bson_legacy/package.json')).version
};
})(),
(async () => ({
name: 'bson-ext',
lib: await import('../../node_modules/bson_ext/lib/index.js'),
version: (await readJSONFile('../../node_modules/bson_ext/package.json')).version
}))()
})()
// BSON-EXT is EOL so we do not need to keep testing it, and it has issues installing it
// in this no-save way on M1 currently that are not worth fixing.
// (async () => ({
// name: 'bson-ext',
// lib: await import('../../node_modules/bson_ext/lib/index.js'),
// version: (await readJSONFile('../../node_modules/bson_ext/package.json')).version
// }))()
]).catch(error => {
console.error(error);
console.error(
Expand All @@ -91,6 +95,27 @@ export async function getLibs() {
});
}

const printHistogram = (name, h) => {
const makeReadableTime = nanoseconds => (nanoseconds / 1e6).toFixed(3).padStart(7, ' ');
console.log();
console.log(chalk.green(name));
console.log('-'.repeat(155));
process.stdout.write(`| ${chalk.cyan('max')}: ${chalk.red(makeReadableTime(h.max))} ms |`);
process.stdout.write(` ${chalk.cyan('min')}: ${chalk.red(makeReadableTime(h.min))} ms |`);
process.stdout.write(` ${chalk.cyan('mean')}: ${chalk.red(makeReadableTime(h.mean))} ms |`);
process.stdout.write(` ${chalk.cyan('stddev')}: ${chalk.red(makeReadableTime(h.stddev))} ms |`);
process.stdout.write(
` ${chalk.cyan('p90th')}: ${chalk.red(makeReadableTime(h.percentile(90)))} ms |`
);
process.stdout.write(
` ${chalk.cyan('p95th')}: ${chalk.red(makeReadableTime(h.percentile(95)))} ms |`
);
process.stdout.write(
` ${chalk.cyan('p99th')}: ${chalk.red(makeReadableTime(h.percentile(99)))} ms |`
);
console.log('\n' + '-'.repeat(155));
};

/**
* ```ts
* interface {
Expand All @@ -109,19 +134,23 @@ export async function runner({ iterations, setup, name, run, skip }) {
const BSONLibs = await getLibs();
const setupResult = setup?.(BSONLibs) ?? null;

console.log(`\ntesting: ${name}`);
console.log('-'.repeat(155));

for (const bson of BSONLibs) {
const { result: perf, thrownError } = testPerformance(bson, [run, setupResult], iterations);
const profileName = `${bson.name}_${name}`;
v8Profiler.startProfiling(profileName, true);
const { histogram, thrownError } = await testPerformance(bson, [run, setupResult], iterations);
if (thrownError != null) {
console.log(
`${bson.name.padEnd(14, ' ')} - v ${bson.version.padEnd(8, ' ')} - error ${thrownError}`
);
} else {
console.log(
`${bson.name.padEnd(14, ' ')} - v ${bson.version.padEnd(8, ' ')} - avg ${perf}ms`
);
printHistogram(`${chalk.greenBright(bson.name)} - ${chalk.blue(name)}`, histogram);
}
const profile = v8Profiler.stopProfiling(profileName);
const result = await promisify(profile.export.bind(profile))();
await writeFile(`${profileName}.cpuprofile`, result);
profile.delete();
}

console.log();
Expand Down
43 changes: 37 additions & 6 deletions etc/benchmarks/main.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
import { performance } from 'perf_hooks';
import { runner, systemInfo, getCurrentLocalBSON } from './lib_runner.mjs';

const iterations = 1_000_000;
const iterations = 100;
const startedEntireRun = performance.now();
console.log(systemInfo(iterations));
console.log();

////////////////////////////////////////////////////////////////////////////////////////////////////
await runner({
skip: true,
name: 'deserialize({ oid, string }, { validation: { utf8: false } })',
name: 'deserialize an objectId and a string without utf8 validation',
iterations,
setup(libs) {
const bson = getCurrentLocalBSON(libs);
Expand All @@ -28,7 +28,7 @@ await runner({
////////////////////////////////////////////////////////////////////////////////////////////////////
await runner({
skip: true,
name: 'new Oid(buf)',
name: 'objectId constructor',
iterations,
setup() {
return Buffer.from('00'.repeat(12), 'hex');
Expand All @@ -40,7 +40,7 @@ await runner({
////////////////////////////////////////////////////////////////////////////////////////////////////
await runner({
skip: true,
name: 'BSON.deserialize(largeDocument)',
name: 'deserialize a large document',
iterations,
setup(libs) {
const bson = getCurrentLocalBSON(libs);
Expand Down Expand Up @@ -68,7 +68,7 @@ await runner({
});

await runner({
skip: false,
skip: true,
name: 'Double Deserialization',
iterations,
setup(libs) {
Expand All @@ -81,7 +81,7 @@ await runner({
});

await runner({
skip: false,
skip: true,
name: 'Many Doubles Deserialization',
iterations,
setup(libs) {
Expand All @@ -98,6 +98,37 @@ await runner({
}
});

/// Batch full of user doc with 20 char strings w/ 20 strings
/// nextBatch simulate
/// nextBatch: [ { string * 20 } * 1000 ] /// Garbage call
await runner({
skip: false,
name: 'deserialize a large batch of documents each with an array of many strings',
iterations,
setup(libs) {
const bson = libs[0].lib;
return bson.serialize({
nextBatch: Array.from({ length: 1000 }, () => ({
_id: new bson.ObjectId(),
arrayField: Array.from({ length: 20 }, (_, i) => '5e99f3f5d3ab06936d36000' + i)
}))
});
},
async run(i, bson, document) {
await Promise.all(
Array.from(
{ length: 100 },
(_, i) =>
new Promise(resolve => {
setTimeout(() => {
resolve(bson.lib.deserialize(document, { validation: { utf8: false } }));
}, 20);
})
)
Comment on lines +122 to +127
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of the setTimeout here? And why did you choose a setTimeout of 20ms specifically?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It manufactures the delay that the driver does when we promise.all many many finds, 20ms is arbitrary, kept it low so the bench runs in a reasonable amount of time

);
}
});

// End
console.log(
'Total time taken to benchmark:',
Expand Down
Loading