Skip to content

Commit aba755d

Browse files
committed
Optimize sqlite-to-postgres migration speed for large tables via temporary constraint suspension
1 parent 3b90189 commit aba755d

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed

database/src/bin/sqlite-to-postgres.rs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ trait Table {
3737
fn write_postgres_csv_row<W: Write>(writer: &mut csv::Writer<W>, row: &rusqlite::Row);
3838
}
3939

40+
trait TableConstraint {
41+
/// Table constraints
42+
fn constraints() -> &'static str;
43+
}
44+
4045
struct Artifact;
4146

4247
#[derive(Serialize)]
@@ -282,6 +287,19 @@ impl Table for Pstat {
282287
}
283288
}
284289

290+
impl TableConstraint for Pstat {
291+
// These constraints can be inspected using `\d` in psql. However, only the primary key needs
292+
// slight modification to comply with the execution statement.
293+
fn constraints() -> &'static str {
294+
r#"
295+
"pstat_pkey" PRIMARY KEY (series, aid, cid);
296+
"pstat_aid_fkey" FOREIGN KEY (aid) REFERENCES artifact(id) ON UPDATE CASCADE ON DELETE CASCADE;
297+
"pstat_cid_fkey" FOREIGN KEY (cid) REFERENCES collection(id) ON UPDATE CASCADE ON DELETE CASCADE;
298+
"pstat_series_fkey" FOREIGN KEY (series) REFERENCES pstat_series(id) ON UPDATE CASCADE ON DELETE CASCADE;
299+
"#
300+
}
301+
}
302+
285303
struct PstatSeries;
286304

287305
#[derive(Serialize)]
@@ -410,6 +428,19 @@ impl Table for RawSelfProfile {
410428
}
411429
}
412430

431+
impl TableConstraint for RawSelfProfile {
432+
// These constraints can be inspected using `\d` in psql. However, only the primary key needs
433+
// slight modification to comply with the execution statement.
434+
fn constraints() -> &'static str {
435+
r#"
436+
"raw_self_profile_pkey" PRIMARY KEY (aid, cid, crate, profile, cache);
437+
"raw_self_profile_aid_fkey" FOREIGN KEY (aid) REFERENCES artifact(id) ON UPDATE CASCADE ON DELETE CASCADE;
438+
"raw_self_profile_cid_fkey" FOREIGN KEY (cid) REFERENCES collection(id) ON UPDATE CASCADE ON DELETE CASCADE;
439+
"raw_self_profile_crate_fkey" FOREIGN KEY (crate) REFERENCES benchmark(name) ON UPDATE CASCADE ON DELETE CASCADE;
440+
"#
441+
}
442+
}
443+
413444
struct RustcCompilation;
414445

415446
#[derive(Serialize)]
@@ -562,9 +593,19 @@ async fn main() -> anyhow::Result<()> {
562593
copy::<CollectorProgress>(&sqlite_tx, &postgres_tx).await;
563594
copy::<Error>(&sqlite_tx, &postgres_tx).await;
564595
copy::<PstatSeries>(&sqlite_tx, &postgres_tx).await;
596+
597+
// Dropping constraints of large tables before copying data and then adding them back.
598+
// This approach should yield a significant speedup.
599+
drop_constraints::<Pstat>(&postgres_tx).await;
565600
copy::<Pstat>(&sqlite_tx, &postgres_tx).await;
601+
add_constraints::<Pstat>(&postgres_tx).await;
602+
566603
copy::<PullRequestBuild>(&sqlite_tx, &postgres_tx).await;
604+
605+
drop_constraints::<RawSelfProfile>(&postgres_tx).await;
567606
copy::<RawSelfProfile>(&sqlite_tx, &postgres_tx).await;
607+
add_constraints::<RawSelfProfile>(&postgres_tx).await;
608+
568609
copy::<RustcCompilation>(&sqlite_tx, &postgres_tx).await;
569610

570611
// This is overly paranoid, but don't commit the Postgres transaction until
@@ -698,3 +739,44 @@ async fn copy<T: Table>(
698739
fn postgres_csv_writer<W: Write>(w: W) -> csv::Writer<W> {
699740
csv::WriterBuilder::new().has_headers(false).from_writer(w)
700741
}
742+
743+
async fn drop_constraints<T: Table + TableConstraint>(postgres: &tokio_postgres::Transaction<'_>) {
744+
let table = T::name();
745+
let constraints = T::constraints();
746+
747+
let start = Instant::now();
748+
for constraint in constraints.split(';').map(str::trim) {
749+
if !constraint.is_empty() {
750+
let name = constraint.split_whitespace().next().unwrap();
751+
postgres
752+
.execute(
753+
&format!("ALTER TABLE {} DROP CONSTRAINT {}", table, name),
754+
&[],
755+
)
756+
.await
757+
.unwrap();
758+
}
759+
}
760+
let elapsed = start.elapsed();
761+
eprintln!("Drop constraints for table {} in {:?}", table, elapsed);
762+
}
763+
764+
async fn add_constraints<T: Table + TableConstraint>(postgres: &tokio_postgres::Transaction<'_>) {
765+
let table = T::name();
766+
let constraints = T::constraints();
767+
768+
let start = Instant::now();
769+
for constraint in constraints.split(';').map(str::trim) {
770+
if !constraint.is_empty() {
771+
postgres
772+
.execute(
773+
&format!("ALTER TABLE {} ADD CONSTRAINT {}", table, constraint),
774+
&[],
775+
)
776+
.await
777+
.unwrap();
778+
}
779+
}
780+
let elapsed = start.elapsed();
781+
eprintln!("Add constraints for table {} in {:?}", table, elapsed);
782+
}

0 commit comments

Comments
 (0)