Skip to content

Commit 4cef57d

Browse files
committed
Add corpus --dry-run and --task-sql-suffix and --repo-sql-suffix
1 parent 0f973ac commit 4cef57d

File tree

6 files changed

+131
-56
lines changed

6 files changed

+131
-56
lines changed

gitoxide-core/src/corpus/db.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,25 @@ impl Engine {
173173
.con
174174
.query_row(
175175
"INSERT INTO gitoxide_version (version) VALUES (?1) ON CONFLICT DO UPDATE SET version = version RETURNING id",
176-
[&self.gitoxide_version],
176+
[&self.state.gitoxide_version],
177177
|r| r.get(0),
178178
)?)
179179
}
180-
pub(crate) fn tasks_or_insert(&self) -> anyhow::Result<Vec<(Id, &'static super::Task)>> {
181-
let mut out: Vec<_> = super::run::ALL.iter().map(|task| (0, task)).collect();
180+
pub(crate) fn tasks_or_insert(
181+
&self,
182+
allowed_short_names: &[String],
183+
) -> anyhow::Result<Vec<(Id, &'static super::Task)>> {
184+
let mut out: Vec<_> = super::run::ALL
185+
.iter()
186+
.filter(|task| {
187+
if allowed_short_names.is_empty() {
188+
true
189+
} else {
190+
allowed_short_names.iter().any(|allowed| task.short_name == allowed)
191+
}
192+
})
193+
.map(|task| (0, task))
194+
.collect();
182195
for (id, task) in &mut out {
183196
*id = self.con.query_row(
184197
"INSERT INTO task (short_name, description) VALUES (?1, ?2) ON CONFLICT DO UPDATE SET short_name = short_name, description = ?2 RETURNING id",

gitoxide-core/src/corpus/engine.rs

Lines changed: 82 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,40 +10,47 @@ use std::path::{Path, PathBuf};
1010
use std::sync::atomic::Ordering;
1111
use std::time::{Duration, Instant};
1212

13+
pub type ProgressItem = gix::progress::DoOrDiscard<gix::progress::prodash::tree::Item>;
14+
15+
pub struct State {
16+
pub progress: ProgressItem,
17+
pub gitoxide_version: String,
18+
pub trace_to_progress: bool,
19+
pub reverse_trace_lines: bool,
20+
}
21+
1322
impl Engine {
1423
/// Open the corpus DB or create it.
15-
pub fn open_or_create(
16-
db: PathBuf,
17-
gitoxide_version: String,
18-
progress: corpus::Progress,
19-
trace_to_progress: bool,
20-
reverse_trace_lines: bool,
21-
) -> anyhow::Result<Engine> {
24+
pub fn open_or_create(db: PathBuf, state: State) -> anyhow::Result<Engine> {
2225
let con = crate::corpus::db::create(db).context("Could not open or create database")?;
23-
Ok(Engine {
24-
progress,
25-
con,
26-
gitoxide_version,
27-
trace_to_progress,
28-
reverse_trace_lines,
29-
})
26+
Ok(Engine { con, state })
3027
}
3128

3229
/// Run on the existing set of repositories we have already seen or obtain them from `path` if there is none yet.
33-
pub fn run(&mut self, corpus_path: PathBuf, threads: Option<usize>) -> anyhow::Result<()> {
30+
pub fn run(
31+
&mut self,
32+
corpus_path: PathBuf,
33+
threads: Option<usize>,
34+
dry_run: bool,
35+
repo_sql_suffix: Option<String>,
36+
allowed_task_names: Vec<String>,
37+
) -> anyhow::Result<()> {
38+
let tasks = self.tasks_or_insert(&allowed_task_names)?;
39+
if tasks.is_empty() {
40+
bail!("Cannot run without any task to perform on the repositories");
41+
}
3442
let (corpus_path, corpus_id) = self.prepare_corpus_path(corpus_path)?;
3543
let gitoxide_id = self.gitoxide_version_id_or_insert()?;
3644
let runner_id = self.runner_id_or_insert()?;
37-
let repos = self.find_repos_or_insert(&corpus_path, corpus_id)?;
38-
let tasks = self.tasks_or_insert()?;
39-
self.perform_run(&corpus_path, gitoxide_id, runner_id, &tasks, repos, threads)
45+
let repos = self.find_repos_or_insert(&corpus_path, corpus_id, repo_sql_suffix)?;
46+
self.perform_run(&corpus_path, gitoxide_id, runner_id, &tasks, repos, threads, dry_run)
4047
}
4148

4249
pub fn refresh(&mut self, corpus_path: PathBuf) -> anyhow::Result<()> {
4350
let (corpus_path, corpus_id) = self.prepare_corpus_path(corpus_path)?;
4451
let repos = self.refresh_repos(&corpus_path, corpus_id)?;
45-
self.progress.set_name("refresh repos");
46-
self.progress.info(format!(
52+
self.state.progress.set_name("refresh repos");
53+
self.state.progress.info(format!(
4754
"Added or updated {} repositories under {corpus_path:?}",
4855
repos.len()
4956
));
@@ -52,6 +59,7 @@ impl Engine {
5259
}
5360

5461
impl Engine {
62+
#[allow(clippy::too_many_arguments)]
5563
fn perform_run(
5664
&mut self,
5765
corpus_path: &Path,
@@ -60,24 +68,42 @@ impl Engine {
6068
tasks: &[(db::Id, &'static Task)],
6169
mut repos: Vec<db::Repo>,
6270
threads: Option<usize>,
71+
dry_run: bool,
6372
) -> anyhow::Result<()> {
6473
let start = Instant::now();
65-
let task_progress = &mut self.progress;
74+
let task_progress = &mut self.state.progress;
6675
task_progress.set_name("run");
6776
task_progress.init(Some(tasks.len()), gix::progress::count("tasks"));
6877
let threads = gix::parallel::num_threads(threads);
6978
let db_path = self.con.path().expect("opened from path on disk").to_owned();
7079
for (task_id, task) in tasks {
7180
let task_start = Instant::now();
7281
let mut repo_progress = task_progress.add_child(format!("run '{}'", task.short_name));
73-
repo_progress.init(Some(repos.len()), gix::progress::count("repos"));
74-
75-
if task.execute_exclusive || threads == 1 {
82+
if task.execute_exclusive || threads == 1 || dry_run {
83+
if dry_run {
84+
task_progress.set_name("WOULD run");
85+
for repo in &repos {
86+
task_progress.info(format!(
87+
"{}",
88+
repo.path
89+
.strip_prefix(corpus_path)
90+
.expect("corpus contains repo")
91+
.display()
92+
));
93+
task_progress.inc();
94+
}
95+
task_progress.info(format!("with {} tasks", tasks.len()));
96+
for (_, task) in tasks {
97+
task_progress.info(format!("task '{}' ({})", task.description, task.short_name))
98+
}
99+
continue;
100+
}
101+
repo_progress.init(Some(repos.len()), gix::progress::count("repos"));
76102
let mut run_progress = repo_progress.add_child("set later");
77103
let (_guard, current_id) = corpus::trace::override_thread_subscriber(
78104
db_path.as_str(),
79-
self.trace_to_progress.then(|| task_progress.add_child("trace")),
80-
self.reverse_trace_lines,
105+
self.state.trace_to_progress.then(|| task_progress.add_child("trace")),
106+
self.state.reverse_trace_lines,
81107
)?;
82108

83109
for repo in &repos {
@@ -180,13 +206,21 @@ impl Engine {
180206
Ok((corpus_path, corpus_id))
181207
}
182208

183-
fn find_repos(&mut self, corpus_path: &Path, corpus_id: db::Id) -> anyhow::Result<Vec<db::Repo>> {
184-
self.progress.set_name("query db-repos");
185-
self.progress.init(None, gix::progress::count("repos"));
209+
fn find_repos(
210+
&mut self,
211+
corpus_path: &Path,
212+
corpus_id: db::Id,
213+
sql_suffix: Option<&str>,
214+
) -> anyhow::Result<Vec<db::Repo>> {
215+
self.state.progress.set_name("query db-repos");
216+
self.state.progress.init(None, gix::progress::count("repos"));
186217

187218
Ok(self
188219
.con
189-
.prepare("SELECT id, rela_path, odb_size, num_objects, num_references FROM repository WHERE corpus = ?1")?
220+
.prepare(&format!(
221+
"SELECT id, rela_path, odb_size, num_objects, num_references FROM repository WHERE corpus = ?1 {}",
222+
sql_suffix.unwrap_or_default()
223+
))?
190224
.query_map([corpus_id], |r| {
191225
Ok(db::Repo {
192226
id: r.get(0)?,
@@ -196,17 +230,17 @@ impl Engine {
196230
num_references: r.get(4)?,
197231
})
198232
})?
199-
.inspect(|_| self.progress.inc())
233+
.inspect(|_| self.state.progress.inc())
200234
.collect::<Result<_, _>>()?)
201235
}
202236

203237
fn refresh_repos(&mut self, corpus_path: &Path, corpus_id: db::Id) -> anyhow::Result<Vec<db::Repo>> {
204238
let start = Instant::now();
205-
self.progress.set_name("refresh");
206-
self.progress.init(None, gix::progress::count("repos"));
239+
self.state.progress.set_name("refresh");
240+
self.state.progress.init(None, gix::progress::count("repos"));
207241

208242
let repos = std::thread::scope({
209-
let progress = &mut self.progress;
243+
let progress = &mut self.state.progress;
210244
let con = &mut self.con;
211245
|scope| -> anyhow::Result<_> {
212246
let threads = std::thread::available_parallelism()
@@ -278,13 +312,23 @@ impl Engine {
278312
Ok(repos)
279313
}
280314

281-
fn find_repos_or_insert(&mut self, corpus_path: &Path, corpus_id: db::Id) -> anyhow::Result<Vec<db::Repo>> {
315+
fn find_repos_or_insert(
316+
&mut self,
317+
corpus_path: &Path,
318+
corpus_id: db::Id,
319+
sql_suffix: Option<String>,
320+
) -> anyhow::Result<Vec<db::Repo>> {
282321
let start = Instant::now();
283-
let repos = self.find_repos(corpus_path, corpus_id)?;
322+
let repos = self.find_repos(corpus_path, corpus_id, sql_suffix.as_deref())?;
284323
if repos.is_empty() {
285-
self.refresh_repos(corpus_path, corpus_id)
324+
let res = self.refresh_repos(corpus_path, corpus_id);
325+
if sql_suffix.is_some() {
326+
self.find_repos(corpus_path, corpus_id, sql_suffix.as_deref())
327+
} else {
328+
res
329+
}
286330
} else {
287-
self.progress.show_throughput(start);
331+
self.state.progress.show_throughput(start);
288332
Ok(repos)
289333
}
290334
}

gitoxide-core/src/corpus/mod.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
pub const PROGRESS_RANGE: std::ops::RangeInclusive<u8> = 0..=3;
2-
pub(crate) type Progress = gix::progress::DoOrDiscard<gix::progress::prodash::tree::Item>;
32

43
pub struct Engine {
5-
progress: Progress,
64
con: rusqlite::Connection,
7-
gitoxide_version: String,
8-
trace_to_progress: bool,
9-
reverse_trace_lines: bool,
5+
state: engine::State,
106
}
117

128
pub struct RunOutcome {
@@ -15,7 +11,7 @@ pub struct RunOutcome {
1511
}
1612

1713
pub(crate) mod db;
18-
pub(crate) mod engine;
14+
pub mod engine;
1915

2016
/// Contains all information necessary to run a task.
2117
pub(crate) struct Task {

gitoxide-core/src/corpus/run.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ impl Task {
88
&self,
99
run: &mut Run,
1010
repo: &Path,
11-
progress: &mut corpus::Progress,
11+
progress: &mut corpus::engine::ProgressItem,
1212
threads: Option<usize>,
1313
should_interrupt: &AtomicBool,
1414
) {
@@ -26,7 +26,7 @@ pub(crate) trait Execute {
2626
fn execute(
2727
&self,
2828
repo: &Path,
29-
progress: &mut corpus::Progress,
29+
progress: &mut corpus::engine::ProgressItem,
3030
threads: Option<usize>,
3131
should_interrupt: &AtomicBool,
3232
) -> anyhow::Result<()>;
@@ -45,7 +45,7 @@ impl Execute for OpenRepo {
4545
fn execute(
4646
&self,
4747
repo: &Path,
48-
_progress: &mut corpus::Progress,
48+
_progress: &mut corpus::engine::ProgressItem,
4949
_threads: Option<usize>,
5050
_should_interrupt: &AtomicBool,
5151
) -> anyhow::Result<()> {

src/plumbing/main.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,19 @@ pub fn main() -> Result<()> {
148148
move |progress, _out, _err| {
149149
let mut engine = core::corpus::Engine::open_or_create(
150150
db,
151-
env!("GITOXIDE_VERSION").into(),
152-
progress,
153-
trace,
154-
reverse_trace_lines,
151+
core::corpus::engine::State {
152+
gitoxide_version: env!("GITOXIDE_VERSION").into(),
153+
progress,
154+
trace_to_progress: trace,
155+
reverse_trace_lines,
156+
},
155157
)?;
156158
match cmd {
157-
crate::plumbing::options::corpus::SubCommands::Run => engine.run(path, thread_limit),
159+
crate::plumbing::options::corpus::SubCommands::Run {
160+
dry_run,
161+
repo_sql_suffix,
162+
include_task,
163+
} => engine.run(path, thread_limit, dry_run, repo_sql_suffix, include_task),
158164
crate::plumbing::options::corpus::SubCommands::Refresh => engine.refresh(path),
159165
}
160166
},

src/plumbing/options/mod.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,23 @@ pub mod corpus {
149149
#[derive(Debug, clap::Subcommand)]
150150
pub enum SubCommands {
151151
/// Perform a corpus run on all registered repositories.
152-
Run,
152+
Run {
153+
/// Don't run any task, but print all repos that would be traversed once.
154+
///
155+
/// Note that this will refresh repositories if necessary and store them in the database, it just won't run tasks.
156+
#[clap(long, short = 'n')]
157+
dry_run: bool,
158+
159+
/// The SQL that will be appended to the actual select statement for repositories to apply additional filtering, like `LIMIT 10`.
160+
///
161+
/// The string must be trusted even though the engine will only execute a single statement.
162+
#[clap(long, short = 'r')]
163+
repo_sql_suffix: Option<String>,
164+
165+
/// The short_names of the tasks to include when running.
166+
#[clap(long, short = 't')]
167+
include_task: Vec<String>,
168+
},
153169
/// Re-read all repositories under the corpus directory, and add or update them.
154170
Refresh,
155171
}

0 commit comments

Comments
 (0)