@@ -6,6 +6,7 @@ use crates_io_worker::BackgroundJob;
6
6
use std:: fs:: { self , File } ;
7
7
use std:: path:: { Path , PathBuf } ;
8
8
use std:: sync:: Arc ;
9
+ use zip:: write:: SimpleFileOptions ;
9
10
10
11
#[ derive( Clone , Serialize , Deserialize ) ]
11
12
pub struct DumpDb {
@@ -28,38 +29,56 @@ impl BackgroundJob for DumpDb {
28
29
/// Create CSV dumps of the public information in the database, wrap them in a
29
30
/// tarball and upload to S3.
30
31
async fn run ( & self , env : Self :: Context ) -> anyhow:: Result < ( ) > {
31
- let target_name = "db-dump.tar.gz" ;
32
+ const TAR_PATH : & str = "db-dump.tar.gz" ;
33
+ const ZIP_PATH : & str = "db-dump.zip" ;
34
+
32
35
let database_url = self . database_url . clone ( ) ;
33
36
34
- let tarball = spawn_blocking ( move || {
37
+ let ( tarball, zip ) = spawn_blocking ( move || {
35
38
let directory = DumpDirectory :: create ( ) ?;
36
39
37
- info ! ( "Begin exporting database" ) ;
40
+ info ! ( "Exporting database… " ) ;
38
41
directory. populate ( & database_url) ?;
39
42
40
43
let export_dir = directory. path ( ) ;
41
- info ! ( path = ?export_dir, "Creating tarball" ) ;
42
- let prefix = PathBuf :: from ( directory. timestamp . format ( "%Y-%m-%d-%H%M%S" ) . to_string ( ) ) ;
43
- create_tarball ( export_dir, & prefix)
44
+ info ! ( path = ?export_dir, "Creating tarball…" ) ;
45
+ let tarball_prefix =
46
+ PathBuf :: from ( directory. timestamp . format ( "%Y-%m-%d-%H%M%S" ) . to_string ( ) ) ;
47
+ create_archives ( export_dir, & tarball_prefix)
44
48
} )
45
49
. await ?;
46
50
47
- info ! ( "Uploading tarball" ) ;
48
- env. storage
49
- . upload_db_dump ( target_name, tarball. path ( ) )
50
- . await ?;
51
+ info ! ( "Uploading tarball…" ) ;
52
+ env. storage . upload_db_dump ( TAR_PATH , tarball. path ( ) ) . await ?;
51
53
info ! ( "Database dump tarball uploaded" ) ;
52
54
53
- info ! ( "Invalidating CDN caches" ) ;
55
+ info ! ( "Invalidating CDN caches…" ) ;
56
+ if let Some ( cloudfront) = env. cloudfront ( ) {
57
+ if let Err ( error) = cloudfront. invalidate ( TAR_PATH ) . await {
58
+ warn ! ( "Failed to invalidate CloudFront cache: {}" , error) ;
59
+ }
60
+ }
61
+
62
+ if let Some ( fastly) = env. fastly ( ) {
63
+ if let Err ( error) = fastly. invalidate ( TAR_PATH ) . await {
64
+ warn ! ( "Failed to invalidate Fastly cache: {}" , error) ;
65
+ }
66
+ }
67
+
68
+ info ! ( "Uploading zip file…" ) ;
69
+ env. storage . upload_db_dump ( ZIP_PATH , zip. path ( ) ) . await ?;
70
+ info ! ( "Database dump zip file uploaded" ) ;
71
+
72
+ info ! ( "Invalidating CDN caches…" ) ;
54
73
if let Some ( cloudfront) = env. cloudfront ( ) {
55
- if let Err ( error) = cloudfront. invalidate ( target_name ) . await {
56
- warn ! ( "failed to invalidate CloudFront cache: {}" , error) ;
74
+ if let Err ( error) = cloudfront. invalidate ( ZIP_PATH ) . await {
75
+ warn ! ( "Failed to invalidate CloudFront cache: {}" , error) ;
57
76
}
58
77
}
59
78
60
79
if let Some ( fastly) = env. fastly ( ) {
61
- if let Err ( error) = fastly. invalidate ( target_name ) . await {
62
- warn ! ( "failed to invalidate Fastly cache: {}" , error) ;
80
+ if let Err ( error) = fastly. invalidate ( ZIP_PATH ) . await {
81
+ warn ! ( "Failed to invalidate Fastly cache: {}" , error) ;
63
82
}
64
83
}
65
84
@@ -202,15 +221,22 @@ pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> {
202
221
Ok ( ( ) )
203
222
}
204
223
205
- fn create_tarball ( export_dir : & Path , prefix : & Path ) -> anyhow:: Result < tempfile:: NamedTempFile > {
206
- debug ! ( "Creating tarball file" ) ;
207
- let tempfile = tempfile:: NamedTempFile :: new ( ) ?;
208
- let encoder = flate2:: write:: GzEncoder :: new ( tempfile. as_file ( ) , flate2:: Compression :: default ( ) ) ;
224
+ fn create_archives (
225
+ export_dir : & Path ,
226
+ tarball_prefix : & Path ,
227
+ ) -> anyhow:: Result < ( tempfile:: NamedTempFile , tempfile:: NamedTempFile ) > {
228
+ debug ! ( "Creating tarball file…" ) ;
229
+ let tar_tempfile = tempfile:: NamedTempFile :: new ( ) ?;
230
+ let encoder =
231
+ flate2:: write:: GzEncoder :: new ( tar_tempfile. as_file ( ) , flate2:: Compression :: default ( ) ) ;
232
+ let mut tar = tar:: Builder :: new ( encoder) ;
209
233
210
- let mut archive = tar:: Builder :: new ( encoder) ;
234
+ debug ! ( "Creating zip file…" ) ;
235
+ let zip_tempfile = tempfile:: NamedTempFile :: new ( ) ?;
236
+ let mut zip = zip:: ZipWriter :: new ( zip_tempfile. as_file ( ) ) ;
211
237
212
- debug ! ( path = ?prefix , "Appending directory to tarball" ) ;
213
- archive . append_dir ( prefix , export_dir) ?;
238
+ debug ! ( "Appending `{tarball_prefix:?}` directory to tarball… " ) ;
239
+ tar . append_dir ( tarball_prefix , export_dir) ?;
214
240
215
241
// Append readme, metadata, schemas.
216
242
let mut paths = Vec :: new ( ) ;
@@ -224,9 +250,13 @@ fn create_tarball(export_dir: &Path, prefix: &Path) -> anyhow::Result<tempfile::
224
250
// Sort paths to make the tarball deterministic.
225
251
paths. sort ( ) ;
226
252
for ( path, file_name) in paths {
227
- let name_in_tar = prefix. join ( file_name) ;
228
- debug ! ( name = ?name_in_tar, "Appending file to tarball" ) ;
229
- archive. append_path_with_name ( path, name_in_tar) ?;
253
+ let name = tarball_prefix. join ( & file_name) ;
254
+ debug ! ( "Appending `{name:?}` file to tarball…" ) ;
255
+ tar. append_path_with_name ( & path, name) ?;
256
+
257
+ debug ! ( "Appending `{file_name:?}` file to zip file…" ) ;
258
+ zip. start_file_from_path ( & file_name, SimpleFileOptions :: default ( ) ) ?;
259
+ std:: io:: copy ( & mut File :: open ( path) ?, & mut zip) ?;
230
260
}
231
261
232
262
// Append topologically sorted tables to make it possible to pipeline
@@ -236,21 +266,34 @@ fn create_tarball(export_dir: &Path, prefix: &Path) -> anyhow::Result<tempfile::
236
266
let visibility_config = VisibilityConfig :: get ( ) ;
237
267
let sorted_tables = visibility_config. topological_sort ( ) ;
238
268
239
- let path = prefix. join ( "data" ) ;
240
- debug ! ( ?path, "Appending directory to tarball" ) ;
241
- archive. append_dir ( path, export_dir. join ( "data" ) ) ?;
269
+ let path = tarball_prefix. join ( "data" ) ;
270
+ debug ! ( "Appending `data` directory to tarball…" ) ;
271
+ tar. append_dir ( path, export_dir. join ( "data" ) ) ?;
272
+
273
+ debug ! ( "Appending `data` directory to zip file…" ) ;
274
+ zip. add_directory ( "data" , SimpleFileOptions :: default ( ) ) ?;
275
+
242
276
for table in sorted_tables {
243
277
let csv_path = export_dir. join ( "data" ) . join ( table) . with_extension ( "csv" ) ;
244
278
if csv_path. exists ( ) {
245
- let name_in_tar = prefix. join ( "data" ) . join ( table) . with_extension ( "csv" ) ;
246
- debug ! ( name = ?name_in_tar, "Appending file to tarball" ) ;
247
- archive. append_path_with_name ( csv_path, name_in_tar) ?;
279
+ let name = tarball_prefix
280
+ . join ( "data" )
281
+ . join ( table)
282
+ . with_extension ( "csv" ) ;
283
+ debug ! ( "Appending `{name:?}` file to tarball…" ) ;
284
+ tar. append_path_with_name ( & csv_path, name) ?;
285
+
286
+ let name = PathBuf :: from ( "data" ) . join ( table) . with_extension ( "csv" ) ;
287
+ debug ! ( "Appending `{name:?}` file to zip file…" ) ;
288
+ zip. start_file_from_path ( & name, SimpleFileOptions :: default ( ) ) ?;
289
+ std:: io:: copy ( & mut File :: open ( csv_path) ?, & mut zip) ?;
248
290
}
249
291
}
250
292
251
- drop ( archive) ;
293
+ drop ( tar) ;
294
+ zip. finish ( ) ?;
252
295
253
- Ok ( tempfile )
296
+ Ok ( ( tar_tempfile , zip_tempfile ) )
254
297
}
255
298
256
299
mod configuration;
@@ -261,6 +304,7 @@ mod tests {
261
304
use super :: * ;
262
305
use flate2:: read:: GzDecoder ;
263
306
use insta:: assert_debug_snapshot;
307
+ use std:: io:: BufReader ;
264
308
use tar:: Archive ;
265
309
266
310
#[ test]
@@ -277,7 +321,7 @@ mod tests {
277
321
fs:: write ( p. join ( "data" ) . join ( "crate_owners.csv" ) , "" ) . unwrap ( ) ;
278
322
fs:: write ( p. join ( "data" ) . join ( "users.csv" ) , "" ) . unwrap ( ) ;
279
323
280
- let tarball = create_tarball ( p, & PathBuf :: from ( "0000-00-00" ) ) . unwrap ( ) ;
324
+ let ( tarball, zip ) = create_archives ( p, & PathBuf :: from ( "0000-00-00" ) ) . unwrap ( ) ;
281
325
let gz = GzDecoder :: new ( File :: open ( tarball. path ( ) ) . unwrap ( ) ) ;
282
326
let mut tar = Archive :: new ( gz) ;
283
327
@@ -296,5 +340,20 @@ mod tests {
296
340
"0000-00-00/data/crate_owners.csv",
297
341
]
298
342
"### ) ;
343
+
344
+ let file = File :: open ( zip. path ( ) ) . unwrap ( ) ;
345
+ let reader = BufReader :: new ( file) ;
346
+
347
+ let archive = zip:: ZipArchive :: new ( reader) . unwrap ( ) ;
348
+ let zip_paths = archive. file_names ( ) . collect :: < Vec < _ > > ( ) ;
349
+ assert_debug_snapshot ! ( zip_paths, @r###"
350
+ [
351
+ "README.md",
352
+ "data/",
353
+ "data/crates.csv",
354
+ "data/users.csv",
355
+ "data/crate_owners.csv",
356
+ ]
357
+ "### ) ;
299
358
}
300
359
}
0 commit comments