Skip to content

Make recent downloads fast #1363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DROP FUNCTION refresh_recent_crate_downloads();
DROP INDEX recent_crate_downloads_crate_id;
DROP MATERIALIZED VIEW recent_crate_downloads;
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS
SELECT crate_id, SUM(downloads) FROM crate_downloads
WHERE date > date(CURRENT_TIMESTAMP - INTERVAL '90 days')
GROUP BY crate_id;
CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id);

CREATE FUNCTION refresh_recent_crate_downloads() RETURNS VOID AS $$
REFRESH MATERIALIZED VIEW CONCURRENTLY recent_crate_downloads;
$$ LANGUAGE SQL;
5 changes: 5 additions & 0 deletions src/bin/update-downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

extern crate cargo_registry;
extern crate chrono;
#[macro_use]
extern crate diesel;

use diesel::prelude::*;
Expand Down Expand Up @@ -29,6 +30,7 @@ fn main() {
}

fn update(conn: &PgConnection) -> QueryResult<()> {
use diesel::select;
use version_downloads::dsl::*;

let mut max = Some(0);
Expand All @@ -42,6 +44,9 @@ fn update(conn: &PgConnection) -> QueryResult<()> {
collect(conn, &rows)?;
max = rows.last().map(|d| d.id);
}

no_arg_sql_function!(refresh_recent_crate_downloads, ());
select(refresh_recent_crate_downloads).execute(conn)?;
Ok(())
}

Expand Down
24 changes: 6 additions & 18 deletions src/controllers/krate/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use models::krate::ALL_COLUMNS;

/// Handles the `GET /summary` route.
pub fn summary(req: &mut Request) -> CargoResult<Response> {
use diesel::sql_query;
use schema::crates::dsl::*;

let conn = req.db_conn()?;
Expand Down Expand Up @@ -54,23 +53,12 @@ pub fn summary(req: &mut Request) -> CargoResult<Response> {
.limit(10)
.load(&*conn)?;

// This query needs to be structured in this way to have the LIMIT
// happen before the joining/sorting for performance reasons.
// It needs to use sql_query because Diesel doesn't have a great way
// to join on subselects right now :(
let most_recently_downloaded = sql_query(
"SELECT crates.* \
FROM crates \
JOIN ( \
SELECT crate_downloads.crate_id, SUM(crate_downloads.downloads) \
FROM crate_downloads \
WHERE crate_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days') \
GROUP BY crate_downloads.crate_id \
ORDER BY SUM(crate_downloads.downloads) DESC NULLS LAST \
LIMIT 10 \
) cd ON crates.id = cd.crate_id \
ORDER BY cd.sum DESC NULLS LAST",
).load::<Crate>(&*conn)?;
let most_recently_downloaded = crates
.inner_join(recent_crate_downloads::table)
.order(recent_crate_downloads::downloads.desc())
.select(ALL_COLUMNS)
.limit(10)
.load(&*conn)?;

let popular_keywords = keywords::table
.order(keywords::crates_cnt.desc())
Expand Down
43 changes: 16 additions & 27 deletions src/controllers/krate/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ use models::krate::{canon_crate_name, ALL_COLUMNS};
/// function out to cover the different use cases, and create unit tests
/// for them.
pub fn search(req: &mut Request) -> CargoResult<Response> {
use diesel::dsl::*;
use diesel::sql_types::{BigInt, Bool, Nullable};
use diesel::sql_types::Bool;

let conn = req.db_conn()?;
let (offset, limit) = req.pagination(10, 100)?;
Expand All @@ -43,30 +42,15 @@ pub fn search(req: &mut Request) -> CargoResult<Response> {
.map(|s| &**s)
.unwrap_or("recent-downloads");

let recent_downloads = sql::<Nullable<BigInt>>("SUM(crate_downloads.downloads)");

let mut query = crates::table
.left_join(
crate_downloads::table.on(crates::id
.eq(crate_downloads::crate_id)
.and(crate_downloads::date.gt(date(now - 90.days())))),
)
.group_by(crates::id)
.left_join(recent_crate_downloads::table)
.select((
ALL_COLUMNS,
false.into_sql::<Bool>(),
recent_downloads.clone(),
recent_crate_downloads::downloads.nullable(),
))
.into_boxed();

if sort == "downloads" {
query = query.order(crates::downloads.desc())
} else if sort == "recent-downloads" {
query = query.order(recent_downloads.clone().desc().nulls_last())
} else {
query = query.order(crates::name.asc())
}

if let Some(q_string) = params.get("q") {
let sort = params.get("sort").map(|s| &**s).unwrap_or("relevance");
let q = plainto_tsquery(q_string);
Expand All @@ -78,16 +62,13 @@ pub fn search(req: &mut Request) -> CargoResult<Response> {
query = query.select((
ALL_COLUMNS,
Crate::with_name(q_string),
recent_downloads.clone(),
recent_crate_downloads::downloads.nullable(),
));
let perfect_match = Crate::with_name(q_string).desc();
if sort == "downloads" {
query = query.order((perfect_match, crates::downloads.desc()));
} else if sort == "recent-downloads" {
query = query.order((perfect_match, recent_downloads.clone().desc().nulls_last()));
} else {
query = query.order(Crate::with_name(q_string).desc());

if sort == "relevance" {
let rank = ts_rank_cd(crates::textsearchable_index_col, q);
query = query.order((perfect_match, rank.desc()))
query = query.then_order_by(rank.desc())
}
}

Expand Down Expand Up @@ -156,6 +137,14 @@ pub fn search(req: &mut Request) -> CargoResult<Response> {
);
}

if sort == "downloads" {
query = query.then_order_by(crates::downloads.desc())
} else if sort == "recent-downloads" {
query = query.then_order_by(recent_crate_downloads::downloads.desc().nulls_last())
} else {
query = query.then_order_by(crates::name.asc())
}

// The database query returns a tuple within a tuple , with the root
// tuple containing 3 items.
let data = query
Expand Down
20 changes: 20 additions & 0 deletions src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,24 @@ table! {
}
}

table! {
/// Representation of the `recent_crate_downloads` view.
///
/// This data represents the downloads in the last 90 days.
/// This view does not contain realtime data.
/// It is refreshed by the `update-downloads` script.
recent_crate_downloads (crate_id) {
/// The `crate_id` column of the `recent_crate_downloads` view.
///
/// Its SQL type is `Integer`.
crate_id -> Integer,
/// The `downloads` column of the `recent_crate_downloads` table.
///
/// Its SQL type is `BigInt`.
downloads -> BigInt,
}
}

table! {
use diesel::sql_types::*;
use diesel_full_text_search::{TsVector as Tsvector};
Expand Down Expand Up @@ -865,6 +883,7 @@ joinable!(emails -> users (user_id));
joinable!(follows -> crates (crate_id));
joinable!(follows -> users (user_id));
joinable!(readme_renderings -> versions (version_id));
joinable!(recent_crate_downloads -> crates (crate_id));
joinable!(version_authors -> users (user_id));
joinable!(version_authors -> versions (version_id));
joinable!(version_downloads -> versions (version_id));
Expand All @@ -886,6 +905,7 @@ allow_tables_to_appear_in_same_query!(
keywords,
metadata,
readme_renderings,
recent_crate_downloads,
reserved_crate_names,
teams,
users,
Expand Down
5 changes: 4 additions & 1 deletion src/tests/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ impl<'a> CrateBuilder<'a> {
}

fn build(mut self, connection: &PgConnection) -> CargoResult<Crate> {
use diesel::{insert_into, update};
use diesel::{insert_into, select, update};

let mut krate = self.krate
.create_or_update(connection, None, self.owner_id)?;
Expand Down Expand Up @@ -444,6 +444,9 @@ impl<'a> CrateBuilder<'a> {
insert_into(crate_downloads::table)
.values(&crate_download)
.execute(connection)?;

no_arg_sql_function!(refresh_recent_crate_downloads, ());
select(refresh_recent_crate_downloads).execute(connection)?;
}

if self.versions.is_empty() {
Expand Down