Skip to content

feat: add option to traverse commits from oldest to newest #1610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ fn run(args: Args) -> anyhow::Result<()> {
Sorting::BreadthFirst
} else {
// else if args.newest_first {
Sorting::ByCommitTimeNewestFirst
Sorting::ByCommitTime(Default::default())
};

let mut min_parents = args.min_parents.unwrap_or(0);
Expand Down
2 changes: 1 addition & 1 deletion gitoxide-core/src/repository/commitgraph/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub(crate) mod function {
.context("Need committish as starting point")?
.id()
.ancestors()
.sorting(Sorting::ByCommitTimeNewestFirst)
.sorting(Sorting::ByCommitTime(Default::default()))
.all()?;
for commit in commits {
let commit = commit?;
Expand Down
2 changes: 1 addition & 1 deletion gitoxide-core/src/repository/revision/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub(crate) mod function {
.context("Need committish as starting point")?
.id()
.ancestors()
.sorting(Sorting::ByCommitTimeNewestFirst)
.sorting(Sorting::ByCommitTime(Default::default()))
.all()?;

let mut vg = match text {
Expand Down
88 changes: 60 additions & 28 deletions gix-traverse/src/commit/simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,20 @@ use gix_date::SecondsSinceUnixEpoch;
use gix_hash::ObjectId;
use gix_hashtable::HashSet;
use smallvec::SmallVec;
use std::cmp::Reverse;
use std::collections::VecDeque;

#[derive(Default, Debug, Copy, Clone)]
/// The order with which to prioritize the search.
pub enum CommitTimeOrder {
#[default]
/// Sort commits by newest first.
NewestFirst,
/// Sort commits by oldest first.
#[doc(alias = "Sort::REVERSE", alias = "git2")]
OldestFirst,
}

/// Specify how to sort commits during a [simple](super::Simple) traversal.
///
/// ### Sample History
Expand All @@ -20,32 +32,35 @@ use std::collections::VecDeque;
pub enum Sorting {
/// Commits are sorted as they are mentioned in the commit graph.
///
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`.
///
/// ### Note
///
/// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
/// as it avoids overlapping branches.
#[default]
BreadthFirst,
/// Commits are sorted by their commit time in descending order, that is newest first.
/// Commits are sorted by their commit time in the order specified, either newest or oldest first.
///
/// The sorting applies to all currently queued commit ids and thus is full.
///
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1`
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for [`NewestFirst`](CommitTimeOrder::NewestFirst),
/// or `1, 2, 3, 4, 5, 6, 7, 8` for [`OldestFirst`](CommitTimeOrder::OldestFirst).
///
/// # Performance
///
/// This mode benefits greatly from having an object_cache in `find()`
/// to avoid having to lookup each commit twice.
ByCommitTimeNewestFirst,
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
ByCommitTime(CommitTimeOrder),
/// This sorting is similar to [`ByCommitTime`](Sorting::ByCommitTime), but adds a cutoff to not return commits older than
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
///
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
///
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
ByCommitTimeNewestFirstCutoffOlderThan {
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`.
ByCommitTimeCutoff {
/// The order in which to prioritize lookups.
order: CommitTimeOrder,
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
seconds: gix_date::SecondsSinceUnixEpoch,
},
Expand All @@ -61,11 +76,14 @@ pub enum Error {
ObjectDecode(#[from] gix_object::decode::Error),
}

use Result as Either;
type QueueKey<T> = Either<T, Reverse<T>>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a first for me to see this usage, and I kind of like it :).
It really does save a lot of boilerplate as well!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was the idea yes, also just wanted to make it clear that this is not an error condition.


/// The state used and potentially shared by multiple graph traversals.
#[derive(Clone)]
pub(super) struct State {
next: VecDeque<ObjectId>,
queue: gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, ObjectId>,
queue: gix_revwalk::PriorityQueue<QueueKey<SecondsSinceUnixEpoch>, ObjectId>,
buf: Vec<u8>,
seen: HashSet<ObjectId>,
parents_buf: Vec<u8>,
Expand All @@ -77,10 +95,13 @@ mod init {
use gix_date::SecondsSinceUnixEpoch;
use gix_hash::{oid, ObjectId};
use gix_object::{CommitRefIter, FindExt};
use std::cmp::Reverse;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Creative!

use Err as Oldest;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I never dared!

use Ok as Newest;

use super::{
super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple},
collect_parents, Error, State,
collect_parents, CommitTimeOrder, Error, State,
};

impl Default for State {
Expand All @@ -105,6 +126,13 @@ mod init {
}
}

fn to_queue_key(i: i64, order: CommitTimeOrder) -> super::QueueKey<i64> {
match order {
CommitTimeOrder::NewestFirst => Newest(i),
CommitTimeOrder::OldestFirst => Oldest(Reverse(i)),
}
}

/// Builder
impl<Find, Predicate> Simple<Find, Predicate>
where
Expand All @@ -117,19 +145,20 @@ mod init {
Sorting::BreadthFirst => {
self.queue_to_vecdeque();
}
Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => {
Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => {
let cutoff_time = self.sorting.cutoff_time();
let state = &mut self.state;
for commit_id in state.next.drain(..) {
let commit_iter = self.objects.find_commit_iter(&commit_id, &mut state.buf)?;
let time = commit_iter.committer()?.time.seconds;
match cutoff_time {
Some(cutoff_time) if time >= cutoff_time => {
state.queue.insert(time, commit_id);
let key = to_queue_key(time, order);
match (cutoff_time, order) {
(Some(cutoff_time), _) if time >= cutoff_time => {
state.queue.insert(key, commit_id);
}
Some(_) => {}
None => {
state.queue.insert(time, commit_id);
(Some(_), _) => {}
(None, _) => {
state.queue.insert(key, commit_id);
}
}
}
Expand Down Expand Up @@ -254,10 +283,8 @@ mod init {
} else {
match self.sorting {
Sorting::BreadthFirst => self.next_by_topology(),
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
self.next_by_commit_date(seconds.into())
}
Sorting::ByCommitTime(order) => self.next_by_commit_date(order, None),
Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()),
}
}
}
Expand All @@ -267,7 +294,7 @@ mod init {
/// If not topo sort, provide the cutoff date if present.
fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
match self {
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(*seconds),
Sorting::ByCommitTimeCutoff { seconds, .. } => Some(*seconds),
_ => None,
}
}
Expand All @@ -281,18 +308,21 @@ mod init {
{
fn next_by_commit_date(
&mut self,
cutoff_older_than: Option<SecondsSinceUnixEpoch>,
order: CommitTimeOrder,
cutoff: Option<SecondsSinceUnixEpoch>,
) -> Option<Result<Info, Error>> {
let state = &mut self.state;

let (commit_time, oid) = state.queue.pop()?;
let (commit_time, oid) = match state.queue.pop()? {
(Newest(t) | Oldest(Reverse(t)), o) => (t, o),
};
let mut parents: ParentIds = Default::default();
match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
Ok(Either::CachedCommit(commit)) => {
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
// drop corrupt caches and try again with ODB
self.cache = None;
return self.next_by_commit_date(cutoff_older_than);
return self.next_by_commit_date(order, cutoff);
}
for (id, parent_commit_time) in state.parent_ids.drain(..) {
parents.push(id);
Expand All @@ -301,9 +331,10 @@ mod init {
continue;
}

match cutoff_older_than {
let key = to_queue_key(parent_commit_time, order);
match cutoff {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
Some(_) | None => state.queue.insert(parent_commit_time, id),
Some(_) | None => state.queue.insert(key, id),
}
}
}
Expand All @@ -323,9 +354,10 @@ mod init {
.and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
.unwrap_or_default();

match cutoff_older_than {
let time = to_queue_key(parent_commit_time, order);
match cutoff {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
Some(_) | None => state.queue.insert(parent_commit_time, id),
Some(_) | None => state.queue.insert(time, id),
}
}
Ok(_unused_token) => break,
Expand Down
Loading
Loading