Skip to content

Commit 2b95219

Browse files
committed
Auto merge of #3592 - jtgeibel:squash-index, r=pietroalbini
Add a background job for squashing the index This adds a background job that squashes the index into a single commit. The current plan is to manually enqueue this job on a 6 week schedule, roughly aligning with new `rustc` releases. Before deploying this, will need to make sure that the SSH key is allowed to do a force push to the protected master branch. This job is derived from a [script] that was periodically run by the cargo team. Relative to the original script, the push of the snapshot branch is no longer forced. The job will fail if run more than once on the same day. (If the first attempt fails before pushing a new root commit upstream, then retries should succeed as long as the snapshot can be fast-forwarded.) [script]: rust-lang/crates-io-cargo-teams#47 (comment)
2 parents 759f108 + 68bf2e7 commit 2b95219

File tree

2 files changed

+104
-10
lines changed

2 files changed

+104
-10
lines changed

src/bin/enqueue-job.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![warn(clippy::all, rust_2018_idioms)]
22

33
use anyhow::{anyhow, Result};
4-
use cargo_registry::{db, env, tasks};
4+
use cargo_registry::{db, env, git, tasks};
55
use diesel::prelude::*;
66
use swirl::schema::background_jobs::dsl::*;
77
use swirl::Job;
@@ -36,6 +36,7 @@ fn main() -> Result<()> {
3636
Ok(tasks::dump_db(database_url, target_name).enqueue(&conn)?)
3737
}
3838
"daily_db_maintenance" => Ok(tasks::daily_db_maintenance().enqueue(&conn)?),
39+
"squash_index" => Ok(git::squash_index().enqueue(&conn)?),
3940
other => Err(anyhow!("Unrecognized job type `{}`", other)),
4041
}
4142
}

src/git.rs

Lines changed: 102 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
use std::collections::HashMap;
22
use std::fs::{self, OpenOptions};
3+
use std::io::prelude::*;
34
use std::path::{Path, PathBuf};
5+
6+
use chrono::Utc;
47
use swirl::PerformError;
5-
use tempfile::{Builder, TempDir};
8+
use tempfile::TempDir;
69
use url::Url;
710

811
use crate::background_jobs::Environment;
@@ -146,7 +149,7 @@ pub struct Repository {
146149

147150
impl Repository {
148151
pub fn open(repository_config: &RepositoryConfig) -> Result<Self, PerformError> {
149-
let checkout_path = Builder::new().prefix("git").tempdir()?;
152+
let checkout_path = tempfile::Builder::new().prefix("git").tempdir()?;
150153

151154
let repository = git2::build::RepoBuilder::new()
152155
.fetch_options(Self::fetch_options(&repository_config.credentials))
@@ -203,12 +206,11 @@ impl Repository {
203206
self.repository
204207
.commit(Some("HEAD"), &sig, &sig, msg, &tree, &[&parent])?;
205208

206-
self.push()
209+
self.push("refs/heads/master")
207210
}
208211

209-
/// Push the current branch to "refs/heads/master"
210-
fn push(&self) -> Result<(), PerformError> {
211-
let refname = "refs/heads/master";
212+
/// Push the current branch to the provided refname
213+
fn push(&self, refspec: &str) -> Result<(), PerformError> {
212214
let mut ref_status = Ok(());
213215
let mut callback_called = false;
214216
{
@@ -217,8 +219,7 @@ impl Repository {
217219
callbacks.credentials(|_, user_from_url, cred_type| {
218220
self.credentials.git2_callback(user_from_url, cred_type)
219221
});
220-
callbacks.push_update_reference(|cb_refname, status| {
221-
assert_eq!(refname, cb_refname);
222+
callbacks.push_update_reference(|_, status| {
222223
if let Some(s) = status {
223224
ref_status = Err(format!("failed to push a ref: {}", s).into())
224225
}
@@ -227,7 +228,7 @@ impl Repository {
227228
});
228229
let mut opts = git2::PushOptions::new();
229230
opts.remote_callbacks(callbacks);
230-
origin.push(&[refname], Some(&mut opts))?;
231+
origin.push(&[refspec], Some(&mut opts))?;
231232
}
232233

233234
if !callback_called {
@@ -276,6 +277,24 @@ impl Repository {
276277
opts.remote_callbacks(callbacks);
277278
opts
278279
}
280+
281+
/// Reset `HEAD` to a single commit with all the index contents, but no parent
282+
fn squash_to_single_commit(&self, msg: &str) -> Result<(), PerformError> {
283+
let tree = self.repository.find_commit(self.head_oid()?)?.tree()?;
284+
let sig = self.repository.signature()?;
285+
286+
// We cannot update an existing `update_ref`, because that requires the
287+
// first parent of this commit to match the ref's current value.
288+
// Instead, create the commit and then do a hard reset.
289+
let commit = self.repository.commit(None, &sig, &sig, msg, &tree, &[])?;
290+
let commit = self
291+
.repository
292+
.find_object(commit, Some(git2::ObjectType::Commit))?;
293+
self.repository
294+
.reset(&commit, git2::ResetType::Hard, None)?;
295+
296+
Ok(())
297+
}
279298
}
280299

281300
#[swirl::background_job]
@@ -357,3 +376,77 @@ pub fn yank(
357376
Ok(())
358377
})
359378
}
379+
380+
/// Collapse the index into a single commit, archiving the current history in a snapshot branch.
381+
#[swirl::background_job]
382+
pub fn squash_index(env: &Environment) -> Result<(), PerformError> {
383+
let repo = env.lock_index()?;
384+
println!("Squashing the index into a single commit.");
385+
386+
let now = Utc::now().format("%Y-%m-%d");
387+
let original_head = repo.head_oid()?.to_string();
388+
let msg = format!("Collapse index into one commit\n\n\
389+
390+
Previous HEAD was {}, now on the `snapshot-{}` branch\n\n\
391+
392+
More information about this change can be found [online] and on [this issue].\n\n\
393+
394+
[online]: https://internals.rust-lang.org/t/cargos-crate-index-upcoming-squash-into-one-commit/8440\n\
395+
[this issue]: https://github.com/rust-lang/crates-io-cargo-teams/issues/47", original_head, now);
396+
397+
repo.squash_to_single_commit(&msg)?;
398+
399+
// Shell out to git because libgit2 does not currently support push leases
400+
401+
let key = match &repo.credentials {
402+
Credentials::Ssh { key } => key,
403+
Credentials::Http { .. } => {
404+
return Err(String::from("squash_index: Password auth not supported").into())
405+
}
406+
_ => return Err(String::from("squash_index: Could not determine credentials").into()),
407+
};
408+
409+
// When running on production, ensure the file is created in tmpfs and not persisted to disk
410+
#[cfg(target_os = "linux")]
411+
let mut temp_key_file = tempfile::Builder::new().tempfile_in("/dev/shm")?;
412+
413+
// For other platforms, default to std::env::tempdir()
414+
#[cfg(not(target_os = "linux"))]
415+
let mut temp_key_file = tempfile::Builder::new().tempfile()?;
416+
417+
temp_key_file.write_all(key.as_bytes())?;
418+
419+
let checkout_path = repo.checkout_path.path();
420+
let output = std::process::Command::new("git")
421+
.current_dir(checkout_path)
422+
.env(
423+
"GIT_SSH_COMMAND",
424+
format!(
425+
"ssh -o StrictHostKeyChecking=accept-new -i {}",
426+
temp_key_file.path().display()
427+
),
428+
)
429+
.args(&[
430+
"push",
431+
// Both updates should succeed or fail together
432+
"--atomic",
433+
"origin",
434+
// Overwrite master, but only if it server matches the expected value
435+
&format!("--force-with-lease=refs/heads/master:{}", original_head),
436+
// The new squashed commit is pushed to master
437+
"HEAD:refs/heads/master",
438+
// The previous value of HEAD is pushed to a snapshot branch
439+
&format!("{}:refs/heads/snapshot-{}", original_head, now),
440+
])
441+
.output()?;
442+
443+
if !output.status.success() {
444+
let stderr = String::from_utf8_lossy(&output.stderr);
445+
let message = format!("Running git command failed with: {}", stderr);
446+
return Err(message.into());
447+
}
448+
449+
println!("The index has been successfully squashed.");
450+
451+
Ok(())
452+
}

0 commit comments

Comments
 (0)