Skip to content

Commit f0e9ee1

Browse files
committed
change!: move the hashing API to gix_hash
This will soon not be dependent on any feature configuration. I wasn’t sure the ideal crate to put this in, but after checking reverse dependencies on crates.io, it seems like there’s essentially no user of `gix-hash` that wouldn’t be pulling in a hashing implementation anyway, so I think this is a fine and logical place for it to be. I named the module `gix_hash::hasher` since `gix_hash::hash` seemed like it would be confusing. This does mean that there is a function and module with the same name, which is permitted but perhaps a little strange. Everything is re‐exported directly other than `gix_features::hash::Write`, which moves along with the I/O convenience functions into a new public submodule and becomes `gix_hash::hasher::io::Write`, as that seems like a clearer name to me, being akin to the `gix_hash::hasher` function but as an `std::io::Write` wrapper.
1 parent 31c3c78 commit f0e9ee1

File tree

24 files changed

+174
-167
lines changed

24 files changed

+174
-167
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-commitgraph/src/file/verify.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,12 @@ impl File {
140140
///
141141
/// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch.
142142
pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, (gix_hash::ObjectId, gix_hash::ObjectId)> {
143-
// Even though we could use gix_features::hash::bytes_of_file(…), this would require using our own
143+
// Even though we could use gix_hash::bytes_of_file(…), this would require using our own
144144
// Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value
145145
// as these files are usually small enough to process them in less than a second, even for the large ones.
146146
// But it's possible, once a progress instance is passed.
147147
let data_len_without_trailer = self.data.len() - self.hash_len;
148-
let mut hasher = gix_features::hash::hasher(self.object_hash());
148+
let mut hasher = gix_hash::hasher(self.object_hash());
149149
hasher.update(&self.data[..data_len_without_trailer]);
150150
let actual = gix_hash::ObjectId::from_bytes_or_panic(hasher.digest().as_ref());
151151

gix-features/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ path = "tests/pipe.rs"
121121
required-features = ["io-pipe"]
122122

123123
[dependencies]
124-
gix-hash = { version = "^0.16.0", path = "../gix-hash" }
125124
gix-trace = { version = "^0.1.12", path = "../gix-trace" }
126125

127126
# for walkdir

gix-features/src/hash.rs

Lines changed: 0 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -71,132 +71,3 @@ pub fn crc32(bytes: &[u8]) -> u32 {
7171
h.update(bytes);
7272
h.finalize()
7373
}
74-
75-
/// Produce a hasher suitable for the given kind of hash.
76-
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
77-
pub fn hasher(kind: gix_hash::Kind) -> Hasher {
78-
match kind {
79-
gix_hash::Kind::Sha1 => Hasher::default(),
80-
}
81-
}
82-
83-
/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
84-
/// while initializing and calling `progress`.
85-
///
86-
/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
87-
/// denoting the amount of bytes to hash starting from the beginning of the file.
88-
///
89-
/// # Note
90-
///
91-
/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
92-
/// [`gix_hash::ObjectId`] return value.
93-
/// * [Interrupts][crate::interrupt] are supported.
94-
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
95-
pub fn bytes_of_file(
96-
path: &std::path::Path,
97-
num_bytes_from_start: u64,
98-
kind: gix_hash::Kind,
99-
progress: &mut dyn crate::progress::Progress,
100-
should_interrupt: &std::sync::atomic::AtomicBool,
101-
) -> std::io::Result<gix_hash::ObjectId> {
102-
bytes(
103-
&mut std::fs::File::open(path)?,
104-
num_bytes_from_start,
105-
kind,
106-
progress,
107-
should_interrupt,
108-
)
109-
}
110-
111-
/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
112-
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
113-
pub fn bytes(
114-
read: &mut dyn std::io::Read,
115-
num_bytes_from_start: u64,
116-
kind: gix_hash::Kind,
117-
progress: &mut dyn crate::progress::Progress,
118-
should_interrupt: &std::sync::atomic::AtomicBool,
119-
) -> std::io::Result<gix_hash::ObjectId> {
120-
bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
121-
}
122-
123-
/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
124-
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
125-
pub fn bytes_with_hasher(
126-
read: &mut dyn std::io::Read,
127-
num_bytes_from_start: u64,
128-
mut hasher: Hasher,
129-
progress: &mut dyn crate::progress::Progress,
130-
should_interrupt: &std::sync::atomic::AtomicBool,
131-
) -> std::io::Result<gix_hash::ObjectId> {
132-
let start = std::time::Instant::now();
133-
// init progress before the possibility for failure, as convenience in case people want to recover
134-
progress.init(
135-
Some(num_bytes_from_start as prodash::progress::Step),
136-
crate::progress::bytes(),
137-
);
138-
139-
const BUF_SIZE: usize = u16::MAX as usize;
140-
let mut buf = [0u8; BUF_SIZE];
141-
let mut bytes_left = num_bytes_from_start;
142-
143-
while bytes_left > 0 {
144-
let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
145-
read.read_exact(out)?;
146-
bytes_left -= out.len() as u64;
147-
progress.inc_by(out.len());
148-
hasher.update(out);
149-
if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
150-
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
151-
}
152-
}
153-
154-
let id = gix_hash::ObjectId::from(hasher.digest());
155-
progress.show_throughput(start);
156-
Ok(id)
157-
}
158-
159-
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
160-
mod write {
161-
use crate::hash::Hasher;
162-
163-
/// A utility to automatically generate a hash while writing into an inner writer.
164-
pub struct Write<T> {
165-
/// The hash implementation.
166-
pub hash: Hasher,
167-
/// The inner writer.
168-
pub inner: T,
169-
}
170-
171-
impl<T> std::io::Write for Write<T>
172-
where
173-
T: std::io::Write,
174-
{
175-
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
176-
let written = self.inner.write(buf)?;
177-
self.hash.update(&buf[..written]);
178-
Ok(written)
179-
}
180-
181-
fn flush(&mut self) -> std::io::Result<()> {
182-
self.inner.flush()
183-
}
184-
}
185-
186-
impl<T> Write<T>
187-
where
188-
T: std::io::Write,
189-
{
190-
/// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
191-
pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
192-
match object_hash {
193-
gix_hash::Kind::Sha1 => Write {
194-
inner,
195-
hash: Hasher::default(),
196-
},
197-
}
198-
}
199-
}
200-
}
201-
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
202-
pub use write::Write;

gix-hash/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ test = false
2020
serde = ["dep:serde"]
2121

2222
[dependencies]
23+
gix-features = { version = "^0.40.0", path = "../gix-features", features = ["rustsha1", "progress"] }
24+
2325
thiserror = "2.0.0"
2426
faster-hex = { version = "0.9.0" }
2527
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }

gix-hash/src/hasher/io.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use crate::{hasher, Hasher};
2+
3+
/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
4+
/// while initializing and calling `progress`.
5+
///
6+
/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
7+
/// denoting the amount of bytes to hash starting from the beginning of the file.
8+
///
9+
/// # Note
10+
///
11+
/// * [Interrupts][gix_features::interrupt] are supported.
12+
pub fn bytes_of_file(
13+
path: &std::path::Path,
14+
num_bytes_from_start: u64,
15+
kind: crate::Kind,
16+
progress: &mut dyn gix_features::progress::Progress,
17+
should_interrupt: &std::sync::atomic::AtomicBool,
18+
) -> std::io::Result<crate::ObjectId> {
19+
bytes(
20+
&mut std::fs::File::open(path)?,
21+
num_bytes_from_start,
22+
kind,
23+
progress,
24+
should_interrupt,
25+
)
26+
}
27+
28+
/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
29+
pub fn bytes(
30+
read: &mut dyn std::io::Read,
31+
num_bytes_from_start: u64,
32+
kind: crate::Kind,
33+
progress: &mut dyn gix_features::progress::Progress,
34+
should_interrupt: &std::sync::atomic::AtomicBool,
35+
) -> std::io::Result<crate::ObjectId> {
36+
bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
37+
}
38+
39+
/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
40+
pub fn bytes_with_hasher(
41+
read: &mut dyn std::io::Read,
42+
num_bytes_from_start: u64,
43+
mut hasher: Hasher,
44+
progress: &mut dyn gix_features::progress::Progress,
45+
should_interrupt: &std::sync::atomic::AtomicBool,
46+
) -> std::io::Result<crate::ObjectId> {
47+
let start = std::time::Instant::now();
48+
// init progress before the possibility for failure, as convenience in case people want to recover
49+
progress.init(
50+
Some(num_bytes_from_start as gix_features::progress::prodash::progress::Step),
51+
gix_features::progress::bytes(),
52+
);
53+
54+
const BUF_SIZE: usize = u16::MAX as usize;
55+
let mut buf = [0u8; BUF_SIZE];
56+
let mut bytes_left = num_bytes_from_start;
57+
58+
while bytes_left > 0 {
59+
let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
60+
read.read_exact(out)?;
61+
bytes_left -= out.len() as u64;
62+
progress.inc_by(out.len());
63+
hasher.update(out);
64+
if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
65+
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
66+
}
67+
}
68+
69+
let id = crate::ObjectId::from(hasher.digest());
70+
progress.show_throughput(start);
71+
Ok(id)
72+
}
73+
74+
/// A utility to automatically generate a hash while writing into an inner writer.
75+
pub struct Write<T> {
76+
/// The hash implementation.
77+
pub hash: Hasher,
78+
/// The inner writer.
79+
pub inner: T,
80+
}
81+
82+
impl<T> std::io::Write for Write<T>
83+
where
84+
T: std::io::Write,
85+
{
86+
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
87+
let written = self.inner.write(buf)?;
88+
self.hash.update(&buf[..written]);
89+
Ok(written)
90+
}
91+
92+
fn flush(&mut self) -> std::io::Result<()> {
93+
self.inner.flush()
94+
}
95+
}
96+
97+
impl<T> Write<T>
98+
where
99+
T: std::io::Write,
100+
{
101+
/// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
102+
pub fn new(inner: T, object_hash: crate::Kind) -> Self {
103+
match object_hash {
104+
crate::Kind::Sha1 => Write {
105+
inner,
106+
hash: Hasher::default(),
107+
},
108+
}
109+
}
110+
}

gix-hash/src/hasher/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/// A implementation of the Sha1 hash, which can be used once.
2+
#[derive(Default, Clone)]
3+
pub struct Hasher(gix_features::hash::Hasher);
4+
5+
impl Hasher {
6+
/// Digest the given `bytes`.
7+
pub fn update(&mut self, bytes: &[u8]) {
8+
self.0.update(bytes);
9+
}
10+
/// Finalize the hash and produce a digest.
11+
pub fn digest(self) -> gix_features::hash::Digest {
12+
self.0.digest()
13+
}
14+
}
15+
16+
/// Produce a hasher suitable for the given kind of hash.
17+
pub fn hasher(kind: crate::Kind) -> Hasher {
18+
match kind {
19+
crate::Kind::Sha1 => Hasher::default(),
20+
}
21+
}
22+
23+
/// Hashing utilities for I/O operations.
24+
pub mod io;

gix-hash/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
mod borrowed;
1414
pub use borrowed::{oid, Error};
1515

16+
///
17+
pub mod hasher;
18+
pub use hasher::io::{bytes, bytes_of_file, bytes_with_hasher};
19+
pub use hasher::{hasher, Hasher};
20+
1621
mod object_id;
1722
pub use object_id::{decode, ObjectId};
1823

gix-hash/tests/object_id/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ mod from_hex {
4545
mod sha1 {
4646
use std::str::FromStr as _;
4747

48-
use gix_features::hash::hasher;
49-
use gix_hash::{Kind, ObjectId};
48+
use gix_hash::{hasher, Kind, ObjectId};
5049

5150
fn hash_contents(s: &[u8]) -> ObjectId {
5251
let mut hasher = hasher(Kind::Sha1);

gix-index/src/extension/end_of_index_entry/decode.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option<usize> {
3333
return None;
3434
}
3535

36-
let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1);
36+
let mut hasher = gix_hash::hasher(gix_hash::Kind::Sha1);
3737
let mut last_chunk = None;
3838
for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - MIN_SIZE_WITH_HEADER - hash_len]) {
3939
hasher.update(&signature);

gix-index/src/extension/end_of_index_entry/write.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub fn write_to(
1818

1919
out.write_all(&offset_to_extensions.to_be_bytes())?;
2020

21-
let mut hasher = gix_features::hash::hasher(hash_kind);
21+
let mut hasher = gix_hash::hasher(hash_kind);
2222
for (signature, size) in prior_extensions {
2323
hasher.update(&signature);
2424
hasher.update(&size.to_be_bytes());

gix-index/src/file/init.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ impl File {
7575
let _span = gix_features::trace::detail!("gix::open_index::hash_index", path = ?path);
7676
let meta = file.metadata()?;
7777
let num_bytes_to_hash = meta.len() - object_hash.len_in_bytes() as u64;
78-
let actual_hash = gix_features::hash::bytes(
78+
let actual_hash = gix_hash::bytes(
7979
&mut file,
8080
num_bytes_to_hash,
8181
object_hash,

gix-index/src/file/verify.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl File {
2525
if let Some(checksum) = self.checksum {
2626
let num_bytes_to_hash = self.path.metadata()?.len() - checksum.as_bytes().len() as u64;
2727
let should_interrupt = AtomicBool::new(false);
28-
let actual = gix_features::hash::bytes_of_file(
28+
let actual = gix_hash::bytes_of_file(
2929
&self.path,
3030
num_bytes_to_hash,
3131
checksum.kind(),

gix-index/src/file/write.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use gix_features::hash;
1+
use gix_hash::hasher;
22

33
use crate::{write, File, Version};
44

@@ -28,7 +28,7 @@ impl File {
2828
let version = self.state.write_to(out, options)?;
2929
(version, self.state.object_hash.null())
3030
} else {
31-
let mut hasher = hash::Write::new(&mut out, self.state.object_hash);
31+
let mut hasher = hasher::io::Write::new(&mut out, self.state.object_hash);
3232
let out: &mut dyn std::io::Write = &mut hasher;
3333
let version = self.state.write_to(out, options)?;
3434
(version, gix_hash::ObjectId::from(hasher.hash.digest()))

gix-object/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,8 @@ pub mod decode {
398398
}
399399
}
400400

401-
fn object_hasher(hash_kind: gix_hash::Kind, object_kind: Kind, object_size: u64) -> gix_features::hash::Hasher {
402-
let mut hasher = gix_features::hash::hasher(hash_kind);
401+
fn object_hasher(hash_kind: gix_hash::Kind, object_kind: Kind, object_size: u64) -> gix_hash::Hasher {
402+
let mut hasher = gix_hash::hasher(hash_kind);
403403
hasher.update(&encode::loose_header(object_kind, object_size));
404404
hasher
405405
}
@@ -426,5 +426,5 @@ pub fn compute_stream_hash(
426426
should_interrupt: &std::sync::atomic::AtomicBool,
427427
) -> std::io::Result<gix_hash::ObjectId> {
428428
let hasher = object_hasher(hash_kind, object_kind, stream_len);
429-
gix_features::hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt)
429+
gix_hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt)
430430
}

0 commit comments

Comments
 (0)