Skip to content

Commit 6b2d526

Browse files
committed
Simplify and optimize Categories::toplevel
`EXPLAIN ANALYZE` output (tested assuming `ORDER BY crates_cnt DESC LIMIT 10 OFFSET 0`) Before: ``` (cost=7129.79..7129.82 rows=10 width=127) (actual time=1.184..1.187 rows=10 loops=1) ``` After: ``` (cost=5.88..5.88 rows=1 width=131) (actual time=0.175..0.177 rows=10 loops=1) ``` About half of the performance comes from removing the `COALESCE`. Since the subselect is including the `crates_cnt` from the toplevel category (which is why it doesn't have to add `c.crates_cnt`), so it can never return null. The second big win is changing from a subselect to a join. PG is usually quite good at figuring out when these cases are equivalent, but I suspect that the use of an aggregate function in the subselect means that it will actually have to subselect in a loop. Finally, we avoid using `LIKE`, since it's more expensive than we need, and can't be indexed. I've opted to use `split_part(slug)` in both the join and outer filter, so that both can be covered by a single index later. The "cheapest" way to do the outer filter is probably `strpos(slug, '::') = 0`, but the difference is so small that it doesn't matter. I explicitly did not include an index here, since the data set is small enough that it would never be used. If the number of categories grows beyond a few hundred, this query can also benefit from an index on `split_part(slug, '::', 1)`. The test coverage around this method was pretty light, so I've added some unit tests to give it a bit more coverage for correctness.
1 parent 9e0aef3 commit 6b2d526

File tree

3 files changed

+114
-12
lines changed

3 files changed

+114
-12
lines changed

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ opt-level = 2
88

99
[lib]
1010
name = "cargo_registry"
11-
test = false
1211
doctest = false
1312

1413
[[test]]

src/bin/delete-crate.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
#![deny(warnings)]
99

10-
#[macro_use]
1110
extern crate cargo_registry;
1211
extern crate postgres;
1312
extern crate time;

src/category.rs

Lines changed: 114 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,13 @@ impl Category {
157157
// Collect all the top-level categories and sum up the crates_cnt of
158158
// the crates in all subcategories
159159
let stmt = conn.prepare(&format!(
160-
"SELECT c.id, c.category, c.slug, c.description, c.created_at, \
161-
COALESCE (( \
162-
SELECT sum(c2.crates_cnt)::int \
163-
FROM categories as c2 \
164-
WHERE c2.slug = c.slug \
165-
OR c2.slug LIKE c.slug || '::%' \
166-
), 0) as crates_cnt \
167-
FROM categories as c \
168-
WHERE c.category NOT LIKE '%::%' {} \
169-
LIMIT $1 OFFSET $2",
160+
"SELECT c.id, c.category, c.slug, c.description, c.created_at,
161+
sum(c2.crates_cnt)::int as crates_cnt
162+
FROM categories as c
163+
INNER JOIN categories c2 ON split_part(c2.slug, '::', 1) = c.slug
164+
WHERE split_part(c.slug, '::', 1) = c.slug
165+
GROUP BY c.id
166+
{} LIMIT $1 OFFSET $2",
170167
sort_sql
171168
))?;
172169

@@ -278,3 +275,110 @@ pub fn slugs(req: &mut Request) -> CargoResult<Response> {
278275
struct R { category_slugs: Vec<Slug> }
279276
Ok(req.json(&R { category_slugs: slugs }))
280277
}
278+
279+
#[cfg(test)]
280+
mod tests {
281+
use super::*;
282+
use pg::{Connection, TlsMode};
283+
use dotenv::dotenv;
284+
use std::env;
285+
286+
fn pg_connection() -> Connection {
287+
let _ = dotenv();
288+
let database_url = env::var("TEST_DATABASE_URL")
289+
.expect("TEST_DATABASE_URL must be set to run tests");
290+
let conn = Connection::connect(database_url, TlsMode::None).unwrap();
291+
// These tests deadlock if run concurrently
292+
conn.batch_execute("BEGIN; LOCK categories IN ACCESS EXCLUSIVE MODE").unwrap();
293+
conn
294+
}
295+
296+
#[test]
297+
fn category_toplevel_excludes_subcategories() {
298+
let conn = pg_connection();
299+
conn.batch_execute("INSERT INTO categories (category, slug) VALUES
300+
('Cat 2', 'cat2'), ('Cat 1', 'cat1'), ('Cat 1::sub', 'cat1::sub')
301+
").unwrap();
302+
303+
let categories = Category::toplevel(&conn, "", 10, 0).unwrap()
304+
.into_iter().map(|c| c.category).collect::<Vec<_>>();
305+
let expected = vec!["Cat 1".to_string(), "Cat 2".to_string()];
306+
assert_eq!(expected, categories);
307+
}
308+
309+
#[test]
310+
fn category_toplevel_orders_by_crates_cnt_when_sort_given() {
311+
let conn = pg_connection();
312+
conn.batch_execute("INSERT INTO categories (category, slug, crates_cnt) VALUES
313+
('Cat 1', 'cat1', 0), ('Cat 2', 'cat2', 2), ('Cat 3', 'cat3', 1)
314+
").unwrap();
315+
316+
let categories = Category::toplevel(&conn, "crates", 10, 0).unwrap()
317+
.into_iter().map(|c| c.category).collect::<Vec<_>>();
318+
let expected = vec!["Cat 2".to_string(), "Cat 3".to_string(), "Cat 1".to_string()];
319+
assert_eq!(expected, categories);
320+
}
321+
322+
#[test]
323+
fn category_toplevel_applies_limit_and_offset() {
324+
let conn = pg_connection();
325+
conn.batch_execute("INSERT INTO categories (category, slug) VALUES
326+
('Cat 1', 'cat1'), ('Cat 2', 'cat2')
327+
").unwrap();
328+
329+
let categories = Category::toplevel(&conn, "", 1, 0).unwrap()
330+
.into_iter().map(|c| c.category).collect::<Vec<_>>();
331+
let expected = vec!["Cat 1".to_string()];
332+
assert_eq!(expected, categories);
333+
334+
let categories = Category::toplevel(&conn, "", 1, 1).unwrap()
335+
.into_iter().map(|c| c.category).collect::<Vec<_>>();
336+
let expected = vec!["Cat 2".to_string()];
337+
assert_eq!(expected, categories);
338+
}
339+
340+
#[test]
341+
fn category_toplevel_includes_subcategories_in_crate_cnt() {
342+
let conn = pg_connection();
343+
conn.batch_execute("INSERT INTO categories (category, slug, crates_cnt) VALUES
344+
('Cat 1', 'cat1', 1), ('Cat 1::sub', 'cat1::sub', 2),
345+
('Cat 2', 'cat2', 3), ('Cat 2::Sub 1', 'cat2::sub1', 4),
346+
('Cat 2::Sub 2', 'cat2::sub2', 5), ('Cat 3', 'cat3', 6)
347+
").unwrap();
348+
349+
let categories = Category::toplevel(&conn, "crates", 10, 0).unwrap()
350+
.into_iter().map(|c| (c.category, c.crates_cnt)).collect::<Vec<_>>();
351+
let expected = vec![
352+
("Cat 2".to_string(), 12),
353+
("Cat 3".to_string(), 6),
354+
("Cat 1".to_string(), 3),
355+
];
356+
assert_eq!(expected, categories);
357+
}
358+
359+
#[test]
360+
fn category_toplevel_applies_limit_and_offset_after_grouping() {
361+
let conn = pg_connection();
362+
conn.batch_execute("INSERT INTO categories (category, slug, crates_cnt) VALUES
363+
('Cat 1', 'cat1', 1), ('Cat 1::sub', 'cat1::sub', 2),
364+
('Cat 2', 'cat2', 3), ('Cat 2::Sub 1', 'cat2::sub1', 4),
365+
('Cat 2::Sub 2', 'cat2::sub2', 5), ('Cat 3', 'cat3', 6)
366+
").unwrap();
367+
368+
let categories = Category::toplevel(&conn, "crates", 2, 0).unwrap()
369+
.into_iter().map(|c| (c.category, c.crates_cnt)).collect::<Vec<_>>();
370+
let expected = vec![
371+
("Cat 2".to_string(), 12),
372+
("Cat 3".to_string(), 6),
373+
];
374+
assert_eq!(expected, categories);
375+
376+
let categories = Category::toplevel(&conn, "crates", 2, 1).unwrap()
377+
.into_iter().map(|c| (c.category, c.crates_cnt)).collect::<Vec<_>>();
378+
let expected = vec![
379+
("Cat 3".to_string(), 6),
380+
("Cat 1".to_string(), 3),
381+
];
382+
assert_eq!(expected, categories);
383+
}
384+
}

0 commit comments

Comments
 (0)