Skip to content

Commit 7706d20

Browse files
committed
Create updates.xml RSS feed
This creates an RSS feed published at https://static.crates.io/rss/updates.xml. The feed is synced with the database in a background job after every successful publish of a new version. It includes the latest 100 published versions with the crate name, version number, crate description, URL and publish date.
1 parent 28c0408 commit 7706d20

File tree

9 files changed

+198
-1
lines changed

9 files changed

+198
-1
lines changed

Cargo.lock

Lines changed: 40 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ paste = "=1.0.15"
100100
prometheus = { version = "=0.13.4", default-features = false }
101101
rand = "=0.8.5"
102102
reqwest = { version = "=0.12.5", features = ["blocking", "gzip", "json"] }
103+
rss = { version = "=2.0.8", default-features = false, features = ["atom"] }
103104
scheduled-thread-pool = "=0.2.7"
104105
secrecy = "=0.8.0"
105106
semver = { version = "=1.0.23", features = ["serde"] }

src/admin/enqueue_job.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ pub enum Command {
3939
force: bool,
4040
},
4141
SendTokenExpiryNotifications,
42+
SyncUpdatesFeed,
4243
}
4344

4445
pub fn run(command: Command) -> Result<()> {
@@ -125,6 +126,9 @@ pub fn run(command: Command) -> Result<()> {
125126
Command::SendTokenExpiryNotifications => {
126127
jobs::SendTokenExpiryNotifications.enqueue(conn)?;
127128
}
129+
Command::SyncUpdatesFeed => {
130+
jobs::rss::SyncUpdatesFeed.enqueue(conn)?;
131+
}
128132
};
129133

130134
Ok(())

src/controllers/krate/publish.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,10 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
431431
CheckTyposquat::new(&krate.name).enqueue(conn)?;
432432
}
433433

434+
if let Err(error) = jobs::rss::SyncUpdatesFeed.enqueue(conn) {
435+
error!("Failed to enqueue `rss::SyncUpdatesFeed` job: {error}");
436+
}
437+
434438
// The `other` field on `PublishWarnings` was introduced to handle a temporary warning
435439
// that is no longer needed. As such, crates.io currently does not return any `other`
436440
// warnings at this time, but if we need to, the field is available.

src/storage.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ use object_store::local::LocalFileSystem;
77
use object_store::memory::InMemory;
88
use object_store::path::Path;
99
use object_store::prefix::PrefixStore;
10-
use object_store::{Attribute, Attributes, ClientOptions, ObjectStore, Result};
10+
use object_store::{Attribute, Attributes, ClientOptions, ObjectStore, PutPayload, Result};
1111
use secrecy::{ExposeSecret, SecretString};
1212
use std::fs;
13+
use std::io::Cursor;
1314
use std::path::PathBuf;
1415
use std::sync::Arc;
1516
use tokio::fs::File;
@@ -203,6 +204,11 @@ impl Storage {
203204
apply_cdn_prefix(&self.cdn_prefix, &readme_path(name, version)).replace('+', "%2B")
204205
}
205206

207+
/// Returns the URL of an uploaded RSS feed.
208+
pub fn feed_url(&self, feed_id: &FeedId) -> String {
209+
apply_cdn_prefix(&self.cdn_prefix, &feed_id.into()).replace('+', "%2B")
210+
}
211+
206212
#[instrument(skip(self))]
207213
pub async fn delete_all_crate_files(&self, name: &str) -> Result<()> {
208214
let prefix = format!("{PREFIX_CRATES}/{name}").into();
@@ -251,6 +257,25 @@ impl Storage {
251257
Ok(())
252258
}
253259

260+
#[instrument(skip(self, channel))]
261+
pub async fn upload_feed(
262+
&self,
263+
feed_id: &FeedId,
264+
channel: &rss::Channel,
265+
) -> anyhow::Result<()> {
266+
let path = feed_id.into();
267+
268+
let mut buffer = Vec::new();
269+
let mut cursor = Cursor::new(&mut buffer);
270+
channel.pretty_write_to(&mut cursor, b' ', 4)?;
271+
let payload = PutPayload::from_bytes(buffer.into());
272+
273+
let attributes = self.attrs([(Attribute::ContentType, "text/xml; charset=UTF-8")]);
274+
let opts = attributes.into();
275+
self.store.put_opts(&path, payload, opts).await?;
276+
Ok(())
277+
}
278+
254279
#[instrument(skip(self, content))]
255280
pub async fn sync_index(&self, name: &str, content: Option<String>) -> Result<()> {
256281
let path = crates_io_index::Repository::relative_index_file_for_url(name).into();
@@ -349,6 +374,19 @@ fn apply_cdn_prefix(cdn_prefix: &Option<String>, path: &Path) -> String {
349374
}
350375
}
351376

377+
#[derive(Debug)]
378+
pub enum FeedId {
379+
Updates,
380+
}
381+
382+
impl From<&FeedId> for Path {
383+
fn from(feed_id: &FeedId) -> Path {
384+
match feed_id {
385+
FeedId::Updates => "rss/updates.xml".into(),
386+
}
387+
}
388+
}
389+
352390
#[cfg(test)]
353391
mod tests {
354392
use super::*;

src/worker/jobs/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub mod dump_db;
1212
mod expiry_notification;
1313
mod git;
1414
mod readmes;
15+
pub mod rss;
1516
mod sync_admins;
1617
mod typosquat;
1718
mod update_default_version;

src/worker/jobs/rss/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
mod sync_updates_feed;
2+
3+
pub use sync_updates_feed::SyncUpdatesFeed;
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
use crate::schema::{crates, versions};
2+
use crate::storage::FeedId;
3+
use crate::worker::Environment;
4+
use anyhow::anyhow;
5+
use crates_io_worker::BackgroundJob;
6+
use diesel::prelude::*;
7+
use std::sync::Arc;
8+
9+
#[derive(Serialize, Deserialize)]
10+
pub struct SyncUpdatesFeed;
11+
12+
const NUM_ITEMS: i64 = 100;
13+
14+
impl BackgroundJob for SyncUpdatesFeed {
15+
const JOB_NAME: &'static str = "sync_updates_feed";
16+
17+
type Context = Arc<Environment>;
18+
19+
async fn run(&self, ctx: Self::Context) -> anyhow::Result<()> {
20+
let feed_id = FeedId::Updates;
21+
22+
info!("Loading latest {NUM_ITEMS} version updates from the database…");
23+
let conn = ctx.deadpool.get().await?;
24+
let version_updates = conn
25+
.interact(load_version_updates)
26+
.await
27+
.map_err(|err| anyhow!(err.to_string()))??;
28+
29+
let link = rss::extension::atom::Link {
30+
href: ctx.storage.feed_url(&feed_id),
31+
rel: "self".to_string(),
32+
mime_type: Some("application/rss+xml".to_string()),
33+
..Default::default()
34+
};
35+
36+
let channel = rss::Channel {
37+
title: "crates.io recent updates".to_string(),
38+
link: "https://crates.io/".to_string(),
39+
description: "Recent version publishes on the crates.io package repository".to_string(),
40+
language: Some("en".to_string()),
41+
atom_ext: Some(rss::extension::atom::AtomExtension { links: vec![link] }),
42+
items: version_updates.into_iter().map(Into::into).collect(),
43+
..Default::default()
44+
};
45+
46+
info!("Uploading feed to storage…");
47+
ctx.storage.upload_feed(&feed_id, &channel).await?;
48+
49+
if let Some(cloudfront) = ctx.cloudfront() {
50+
let path = object_store::path::Path::from(&feed_id);
51+
52+
info!(%path, "Invalidating CloudFront cache…");
53+
cloudfront.invalidate(path.as_ref()).await?;
54+
} else {
55+
info!("Skipping CloudFront cache invalidation (CloudFront not configured)");
56+
}
57+
58+
info!("Finished syncing updates feed");
59+
Ok(())
60+
}
61+
}
62+
63+
fn load_version_updates(conn: &mut PgConnection) -> QueryResult<Vec<VersionUpdate>> {
64+
versions::table
65+
.inner_join(crates::table)
66+
.order(versions::created_at.desc())
67+
.select(VersionUpdate::as_select())
68+
.limit(NUM_ITEMS)
69+
.load(conn)
70+
}
71+
72+
#[derive(Debug, Queryable, Selectable)]
73+
#[diesel(check_for_backend(diesel::pg::Pg))]
74+
struct VersionUpdate {
75+
#[diesel(select_expression = crates::columns::name)]
76+
name: String,
77+
#[diesel(select_expression = versions::columns::num)]
78+
version: String,
79+
#[diesel(select_expression = crates::columns::description)]
80+
description: Option<String>,
81+
#[diesel(select_expression = versions::columns::created_at)]
82+
time: chrono::NaiveDateTime,
83+
}
84+
85+
impl From<VersionUpdate> for rss::Item {
86+
fn from(u: VersionUpdate) -> Self {
87+
let title = format!("{} v{}", u.name, u.version);
88+
let link = format!("https://crates.io/crates/{}/{}", u.name, u.version);
89+
let pub_date = u.time.and_utc().to_rfc2822();
90+
91+
let guid = rss::Guid {
92+
value: link.clone(),
93+
permalink: true,
94+
};
95+
96+
rss::Item {
97+
guid: Some(guid),
98+
title: Some(title),
99+
link: Some(link),
100+
description: u.description,
101+
pub_date: Some(pub_date),
102+
..Default::default()
103+
}
104+
}
105+
}

src/worker/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,6 @@ impl RunnerExt for Runner<Arc<Environment>> {
3535
.register_job_type::<jobs::UpdateDownloads>()
3636
.register_job_type::<jobs::UpdateDefaultVersion>()
3737
.register_job_type::<jobs::SendTokenExpiryNotifications>()
38+
.register_job_type::<jobs::rss::SyncUpdatesFeed>()
3839
}
3940
}

0 commit comments

Comments
 (0)