Skip to content

Commit c6e67fc

Browse files
committed
Use a standard HashMap instead of one based on SHA1
It's faster throughout the board. ``` ❯ cargo bench -p [email protected] --bench edit-tree Compiling gix-object v0.44.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-object) Compiling gix-pack v0.53.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-pack) Compiling gix-odb v0.63.0 (/Users/byron/dev/github.com/Byron/gitoxide/gix-odb) Finished `bench` profile [optimized] target(s) in 5.97s Running benches/edit_tree.rs (target/release/deps/edit_tree-6af6651a1c453a05) Gnuplot not found, using plotters backend editor/small tree (empty -> full -> empty) time: [2.5972 µs 2.6019 µs 2.6075 µs] thrpt: [3.8351 Melem/s 3.8434 Melem/s 3.8503 Melem/s] change: time: [-32.618% -32.355% -32.038%] (p = 0.00 < 0.05) thrpt: [+47.142% +47.831% +48.409%] Performance has improved. Found 14 outliers among 100 measurements (14.00%) 13 (13.00%) high mild 1 (1.00%) high severe editor/deeply nested tree (empty -> full -> empty) time: [8.2019 µs 8.2079 µs 8.2145 µs] thrpt: [5.5998 Melem/s 5.6043 Melem/s 5.6084 Melem/s] change: time: [-33.517% -33.377% -33.246%] (p = 0.00 < 0.05) thrpt: [+49.804% +50.099% +50.415%] Performance has improved. Found 13 outliers among 100 measurements (13.00%) 8 (8.00%) high mild 5 (5.00%) high severe cursor/small tree (empty -> full -> empty) time: [2.6911 µs 2.6935 µs 2.6961 µs] thrpt: [3.7090 Melem/s 3.7127 Melem/s 3.7160 Melem/s] change: time: [-33.881% -33.546% -33.225%] (p = 0.00 < 0.05) thrpt: [+49.757% +50.480% +51.242%] Performance has improved. Found 14 outliers among 100 measurements (14.00%) 4 (4.00%) high mild 10 (10.00%) high severe cursor/deeply nested tree (empty -> full -> empty) time: [1.3616 µs 1.3631 µs 1.3649 µs] thrpt: [33.703 Melem/s 33.747 Melem/s 33.783 Melem/s] change: time: [-40.063% -39.675% -39.234%] (p = 0.00 < 0.05) thrpt: [+64.566% +65.769% +66.843%] Performance has improved. Found 20 outliers among 100 measurements (20.00%) 18 (18.00%) high mild 2 (2.00%) high severe ```
1 parent a3acb76 commit c6e67fc

File tree

2 files changed

+10
-19
lines changed

2 files changed

+10
-19
lines changed

gix-object/src/tree/editor.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::tree::{Editor, EntryKind};
22
use crate::{tree, Tree};
33
use bstr::{BStr, BString, ByteSlice, ByteVec};
44
use gix_hash::ObjectId;
5-
use gix_hashtable::hash_map::Entry;
65
use std::cmp::Ordering;
6+
use std::collections::{hash_map, HashMap};
77

88
/// A way to constrain all [tree-edits](Editor) to a given subtree.
99
pub struct Cursor<'a, 'find> {
@@ -24,7 +24,7 @@ impl<'a> Editor<'a> {
2424
Editor {
2525
find,
2626
object_hash,
27-
trees: gix_hashtable::HashMap::from_iter(Some((empty_path_hash(), root))),
27+
trees: HashMap::from_iter(Some((empty_path(), root))),
2828
path_buf: Vec::with_capacity(256).into(),
2929
tree_buf: Vec::with_capacity(512),
3030
}
@@ -259,8 +259,8 @@ impl<'a> Editor<'a> {
259259
push_path_component(&mut self.path_buf, name);
260260
let path_id = path_hash(&self.path_buf);
261261
cursor = match self.trees.entry(path_id) {
262-
Entry::Occupied(e) => e.into_mut(),
263-
Entry::Vacant(e) => e.insert(
262+
hash_map::Entry::Occupied(e) => e.into_mut(),
263+
hash_map::Entry::Vacant(e) => e.insert(
264264
if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) {
265265
self.find.find_tree(&tree_id, &mut self.tree_buf)?.into()
266266
} else {
@@ -279,7 +279,7 @@ impl<'a> Editor<'a> {
279279
/// This is useful if the same editor is re-used for various trees.
280280
pub fn set_root(&mut self, root: Tree) -> &mut Self {
281281
self.trees.clear();
282-
self.trees.insert(empty_path_hash(), root);
282+
self.trees.insert(empty_path(), root);
283283
self
284284
}
285285
}
@@ -387,14 +387,12 @@ fn filename(path: &BStr) -> &BStr {
387387
path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..])
388388
}
389389

390-
fn empty_path_hash() -> ObjectId {
391-
gix_features::hash::hasher(gix_hash::Kind::Sha1).digest().into()
390+
fn empty_path() -> BString {
391+
BString::default()
392392
}
393393

394-
fn path_hash(path: &[u8]) -> ObjectId {
395-
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
396-
hasher.update(path);
397-
hasher.digest().into()
394+
fn path_hash(path: &[u8]) -> BString {
395+
path.to_vec().into()
398396
}
399397

400398
fn push_path_component(base: &mut BString, component: &[u8]) -> usize {

gix-object/src/tree/mod.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use crate::{
22
bstr::{BStr, BString},
33
tree, Tree,
44
};
5-
use gix_hash::ObjectId;
65
use std::cmp::Ordering;
76

87
///
@@ -19,12 +18,6 @@ pub mod write;
1918
///
2019
/// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well
2120
/// with some penalties.
22-
///
23-
/// ### Note
24-
///
25-
/// For reasons of efficiency, internally a SHA1 based hashmap is used to avoid having to store full paths
26-
/// to each edited tree. The chance of collision is low, but could be engineered to overwrite or write into
27-
/// an unintended tree.
2821
#[doc(alias = "TreeUpdateBuilder", alias = "git2")]
2922
#[derive(Clone)]
3023
pub struct Editor<'a> {
@@ -35,7 +28,7 @@ pub struct Editor<'a> {
3528
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
3629
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
3730
/// dropped when writing at the latest.
38-
trees: gix_hashtable::HashMap<ObjectId, Tree>,
31+
trees: std::collections::HashMap<BString, Tree>,
3932
/// A buffer to build up paths when finding the tree to edit.
4033
path_buf: BString,
4134
/// Our buffer for storing tree-data in, right before decoding it.

0 commit comments

Comments
 (0)