Skip to content

Commit 14ccbba

Browse files
Merge #1312
1312: Switch most recently downloaded summary query to be faster r=jtgeibel Related to #1304. Because we only want to show the top 10 for /api/v1/summary, move the LIMIT within the query for the top downloads rather than joining on everything and then limiting. This should help alleviate at least some of the slowdowns we were seeing today with the /api/v1/summary call; there are other queries that are taking up some load that will need other mitigation strategies though :( Here's an explain analyze from the old query on a production dump from today: ``` explain analyze SELECT "crates"."id", "crates"."name", "crates"."updated_at", "crates"."created_at", "crates"."downloads", "crates"."description", "crates"."homepage", "crates"."documentation", "crates"."readme", "crates"."readme_file", "crates"."license", "crates"."repository", "crates"."max_upload_size" FROM ( "crates" LEFT OUTER JOIN "crate_downloads" ON "crates"."id" = "crate_downloads"."crate_id" AND "crate_downloads"."date" > date(CURRENT_TIMESTAMP - INTERVAL '90 DAYS') ) GROUP BY "crates"."id" ORDER BY SUM(crate_downloads.downloads) DESC NULLS LAST LIMIT 10; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Limit (cost=81546.14..81546.17 rows=10 width=858) (actual time=546.007..546.009 rows=10 loops=1) -> Sort (cost=81546.14..81583.65 rows=15004 width=858) (actual time=546.006..546.007 rows=10 loops=1) Sort Key: (sum(crate_downloads.downloads)) DESC NULLS LAST Sort Method: top-N heapsort Memory: 37kB -> GroupAggregate (cost=65223.31..81221.91 rows=15004 width=858) (actual time=273.398..539.672 rows=15004 loops=1) Group Key: crates.id -> Merge Left Join (cost=65223.31..78880.53 rows=438269 width=854) (actual time=273.331..485.878 rows=435230 loops=1) Merge Cond: (crates.id = crate_downloads.crate_id) -> Index Scan using packages_pkey on crates (cost=0.29..5950.28 rows=15004 width=850) (actual time=0.009..9.989 rows=15004 loops=1) -> Materialize (cost=65223.03..67414.37 rows=438269 width=8) (actual time=273.317..386.191 rows=435229 loops=1) -> Sort (cost=65223.03..66318.70 rows=438269 width=8) (actual time=273.314..332.477 rows=435229 loops=1) Sort Key: crate_downloads.crate_id Sort Method: external merge Disk: 7712kB -> Index Scan using index_crate_downloads_date on crate_downloads (cost=0.44..18162.03 rows=438269 width=8) (actual time=0.017..112.539 rows=435229 loops=1) Index Cond: (date > date((CURRENT_TIMESTAMP - '90 days'::interval))) Planning time: 0.502 ms Execution time: 605.679 ms (17 rows) ``` And the new query: ``` explain analyze SELECT crates.* FROM crates JOIN ( SELECT crate_downloads.crate_id, SUM(crate_downloads.downloads) FROM crate_downloads WHERE crate_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days') GROUP BY crate_downloads.crate_id ORDER BY SUM(crate_downloads.downloads) DESC NULLS LAST LIMIT 10 ) cd ON crates.id = cd.crate_id production_crates_io_explain-# ORDER BY cd.sum DESC NULLS LAST; QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop (cost=20323.24..20402.11 rows=10 width=1057) (actual time=180.585..180.682 rows=10 loops=1) -> Limit (cost=20322.96..20322.98 rows=10 width=12) (actual time=180.537..180.539 rows=10 loops=1) -> Sort (cost=20322.96..20350.82 rows=11146 width=12) (actual time=180.536..180.537 rows=10 loops=1) Sort Key: (sum(crate_downloads.downloads)) DESC NULLS LAST Sort Method: top-N heapsort Memory: 25kB -> Finalize HashAggregate (cost=19970.64..20082.10 rows=11146 width=12) (actual time=176.709..178.856 rows=15003 loops=1) Group Key: crate_downloads.crate_id -> Gather (cost=17518.52..19859.18 rows=22292 width=12) (actual time=150.947..163.257 rows=44654 loops=1) Workers Planned: 2 Workers Launched: 2 -> Partial HashAggregate (cost=16518.52..16629.98 rows=11146 width=12) (actual time=146.173..149.575 rows=14885 loops=3) Group Key: crate_downloads.crate_id -> Parallel Index Scan using index_crate_downloads_date on crate_downloads (cost=0.44..15605.46 rows=182612 width=8) (actual time=0.045..86.019 rows=145076 loops=3) Index Cond: (date > date((CURRENT_TIMESTAMP - '90 days'::interval))) -> Index Scan using packages_pkey on crates (cost=0.29..7.90 rows=1 width=1049) (actual time=0.012..0.012 rows=1 loops=10) Index Cond: (id = crate_downloads.crate_id) Planning time: 1.253 ms Execution time: 181.550 ms (18 rows) ```
2 parents a0f370a + 86cd1d8 commit 14ccbba

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

src/controllers/krate/metadata.rs

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ use models::krate::ALL_COLUMNS;
1414

1515
/// Handles the `GET /summary` route.
1616
pub fn summary(req: &mut Request) -> CargoResult<Response> {
17-
use diesel::dsl::*;
18-
use diesel::sql_types::{BigInt, Nullable};
17+
use diesel::sql_query;
1918
use schema::crates::dsl::*;
2019

2120
let conn = req.db_conn()?;
@@ -55,17 +54,23 @@ pub fn summary(req: &mut Request) -> CargoResult<Response> {
5554
.limit(10)
5655
.load(&*conn)?;
5756

58-
let recent_downloads = sql::<Nullable<BigInt>>("SUM(crate_downloads.downloads)");
59-
let most_recently_downloaded = crates
60-
.left_join(
61-
crate_downloads::table.on(id.eq(crate_downloads::crate_id)
62-
.and(crate_downloads::date.gt(date(now - 90.days())))),
63-
)
64-
.group_by(id)
65-
.order(recent_downloads.desc().nulls_last())
66-
.limit(10)
67-
.select(ALL_COLUMNS)
68-
.load::<Crate>(&*conn)?;
57+
// This query needs to be structured in this way to have the LIMIT
58+
// happen before the joining/sorting for performance reasons.
59+
// It needs to use sql_query because Diesel doesn't have a great way
60+
// to join on subselects right now :(
61+
let most_recently_downloaded = sql_query(
62+
"SELECT crates.* \
63+
FROM crates \
64+
JOIN ( \
65+
SELECT crate_downloads.crate_id, SUM(crate_downloads.downloads) \
66+
FROM crate_downloads \
67+
WHERE crate_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days') \
68+
GROUP BY crate_downloads.crate_id \
69+
ORDER BY SUM(crate_downloads.downloads) DESC NULLS LAST \
70+
LIMIT 10 \
71+
) cd ON crates.id = cd.crate_id \
72+
ORDER BY cd.sum DESC NULLS LAST",
73+
).load::<Crate>(&*conn)?;
6974

7075
let popular_keywords = keywords::table
7176
.order(keywords::crates_cnt.desc())

src/models/krate.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ pub struct CrateDownload {
3030
pub date: NaiveDate,
3131
}
3232

33-
#[derive(Debug, Clone, Queryable, Identifiable, Associations, AsChangeset)]
33+
#[derive(Debug, Clone, Queryable, Identifiable, Associations, AsChangeset, QueryableByName)]
34+
#[table_name = "crates"]
3435
pub struct Crate {
3536
pub id: i32,
3637
pub name: String,

0 commit comments

Comments
 (0)