Skip to content

Commit 5cfdbca

Browse files
committed
add basic support for seek-based pagination in the crates endpoint
1 parent 5b18e0c commit 5cfdbca

File tree

3 files changed

+301
-19
lines changed

3 files changed

+301
-19
lines changed

src/controllers/helpers/pagination.rs

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use diesel::query_builder::*;
1010
use diesel::query_dsl::LoadQuery;
1111
use diesel::sql_types::BigInt;
1212
use indexmap::IndexMap;
13+
use serde::{Deserialize, Serialize};
1314
use std::sync::Arc;
1415

1516
const MAX_PAGE_BEFORE_SUSPECTED_BOT: u32 = 10;
@@ -19,6 +20,7 @@ const MAX_PER_PAGE: u32 = 100;
1920
#[derive(Debug, Clone, PartialEq, Eq)]
2021
pub(crate) enum Page {
2122
Numeric(u32),
23+
Seek(RawSeekPayload),
2224
Unspecified,
2325
}
2426

@@ -32,6 +34,7 @@ impl PaginationOptions {
3234
pub(crate) fn builder() -> PaginationOptionsBuilder {
3335
PaginationOptionsBuilder {
3436
limit_page_numbers: None,
37+
enable_seek: false,
3538
}
3639
}
3740

@@ -46,6 +49,7 @@ impl PaginationOptions {
4649

4750
pub(crate) struct PaginationOptionsBuilder {
4851
limit_page_numbers: Option<Arc<App>>,
52+
enable_seek: bool,
4953
}
5054

5155
impl PaginationOptionsBuilder {
@@ -54,9 +58,21 @@ impl PaginationOptionsBuilder {
5458
self
5559
}
5660

61+
pub(crate) fn enable_seek(mut self, enable: bool) -> Self {
62+
self.enable_seek = enable;
63+
self
64+
}
65+
5766
pub(crate) fn gather(self, req: &mut dyn RequestExt) -> AppResult<PaginationOptions> {
5867
let params = req.query();
5968
let page_param = params.get("page");
69+
let seek_param = params.get("seek");
70+
71+
if seek_param.is_some() && page_param.is_some() {
72+
return Err(bad_request(
73+
"providing both ?page= and ?seek= is unsupported",
74+
));
75+
}
6076

6177
let page = if let Some(s) = page_param {
6278
let numeric_page = s.parse().map_err(|e| bad_request(&e))?;
@@ -87,6 +103,12 @@ impl PaginationOptionsBuilder {
87103
}
88104

89105
Page::Numeric(numeric_page)
106+
} else if let Some(s) = seek_param {
107+
if self.enable_seek {
108+
Page::Seek(RawSeekPayload(s.clone()))
109+
} else {
110+
return Err(bad_request("?seek= is not supported for this request"));
111+
}
90112
} else {
91113
Page::Unspecified
92114
};
@@ -139,14 +161,15 @@ impl<T> Paginated<T> {
139161
match self.options.page {
140162
Page::Numeric(n) => opts.insert("page".into(), (n + 1).to_string()),
141163
Page::Unspecified => opts.insert("page".into(), 2.to_string()),
164+
Page::Seek(_) => return None,
142165
};
143166
Some(opts)
144167
}
145168

146169
pub(crate) fn prev_page_params(&self) -> Option<IndexMap<String, String>> {
147170
let mut opts = IndexMap::new();
148171
match self.options.page {
149-
Page::Numeric(1) | Page::Unspecified => return None,
172+
Page::Numeric(1) | Page::Unspecified | Page::Seek(_) => return None,
150173
Page::Numeric(n) => opts.insert("page".into(), (n - 1).to_string()),
151174
};
152175
Some(opts)
@@ -214,6 +237,35 @@ where
214237
}
215238
}
216239

240+
#[derive(Debug, Clone, PartialEq, Eq)]
241+
pub(crate) struct RawSeekPayload(String);
242+
243+
impl RawSeekPayload {
244+
pub(crate) fn decode<D: for<'a> Deserialize<'a>>(&self) -> AppResult<D> {
245+
decode_seek(&self.0)
246+
}
247+
}
248+
249+
/// Encode a payload to be used as a seek key.
250+
///
251+
/// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no
252+
/// technical measure to prevent API consumers for manually creating or modifying them, but
253+
/// hopefully the base64 will be enough to convey that doing it is unsupported.
254+
pub(crate) fn encode_seek<S: Serialize>(params: S) -> AppResult<String> {
255+
Ok(base64::encode_config(
256+
&serde_json::to_vec(&params)?,
257+
base64::URL_SAFE_NO_PAD,
258+
))
259+
}
260+
261+
/// Decode a list of params previously encoded with [`encode_seek`].
262+
pub(crate) fn decode_seek<D: for<'a> Deserialize<'a>>(seek: &str) -> AppResult<D> {
263+
Ok(serde_json::from_slice(&base64::decode_config(
264+
seek,
265+
base64::URL_SAFE_NO_PAD,
266+
)?)?)
267+
}
268+
217269
#[cfg(test)]
218270
mod tests {
219271
use super::*;
@@ -259,6 +311,57 @@ mod tests {
259311
assert_eq!(5, pagination.per_page);
260312
}
261313

314+
#[test]
315+
fn seek_param_parsing() {
316+
assert_pagination_error(
317+
PaginationOptions::builder(),
318+
"seek=OTg",
319+
"?seek= is not supported for this request",
320+
);
321+
322+
let pagination = PaginationOptions::builder()
323+
.enable_seek(true)
324+
.gather(&mut mock("seek=OTg"))
325+
.unwrap();
326+
327+
if let Page::Seek(raw) = pagination.page {
328+
assert_eq!(98, raw.decode::<i32>().unwrap());
329+
} else {
330+
panic!(
331+
"did not parse a seek page, parsed {:?} instead",
332+
pagination.page
333+
);
334+
}
335+
}
336+
337+
#[test]
338+
fn both_page_and_seek() {
339+
assert_pagination_error(
340+
PaginationOptions::builder(),
341+
"page=1&seek=OTg",
342+
"providing both ?page= and ?seek= is unsupported",
343+
);
344+
assert_pagination_error(
345+
PaginationOptions::builder().enable_seek(true),
346+
"page=1&seek=OTg",
347+
"providing both ?page= and ?seek= is unsupported",
348+
);
349+
}
350+
351+
#[test]
352+
fn test_seek_encode_and_decode() {
353+
// Encoding produces the results we expect
354+
assert_eq!("OTg", encode_seek(98).unwrap());
355+
assert_eq!("WyJmb28iLDQyXQ", encode_seek(("foo", 42)).unwrap());
356+
357+
// Encoded values can be then decoded.
358+
assert_eq!(98, decode_seek::<i32>(&encode_seek(98).unwrap()).unwrap(),);
359+
assert_eq!(
360+
("foo".into(), 42),
361+
decode_seek::<(String, i32)>(&encode_seek(("foo", 42)).unwrap()).unwrap(),
362+
);
363+
}
364+
262365
fn mock(query: &str) -> MockRequest {
263366
let mut req = MockRequest::new(http::Method::GET, "");
264367
req.with_query(query);

src/controllers/krate/search.rs

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use diesel::dsl::*;
44
use diesel_full_text_search::*;
5+
use indexmap::IndexMap;
56

67
use crate::controllers::cargo_prelude::*;
78
use crate::controllers::helpers::Paginate;
@@ -12,7 +13,7 @@ use crate::schema::*;
1213
use crate::util::errors::{bad_request, ChainError};
1314
use crate::views::EncodableCrate;
1415

15-
use crate::controllers::helpers::pagination::{Paginated, PaginationOptions};
16+
use crate::controllers::helpers::pagination::{Page, Paginated, PaginationOptions};
1617
use crate::models::krate::{canon_crate_name, ALL_COLUMNS};
1718

1819
/// Handles the `GET /crates` route.
@@ -56,7 +57,13 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
5657
.select(selection)
5758
.into_boxed();
5859

60+
let mut supports_seek = true;
61+
5962
if let Some(q_string) = params.get("q") {
63+
// Searching with a query string always puts the exact match at the start of the results,
64+
// so we can't support seek-based pagination with it.
65+
supports_seek = false;
66+
6067
if !q_string.is_empty() {
6168
let sort = params.get("sort").map(|s| &**s).unwrap_or("relevance");
6269

@@ -84,6 +91,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
8491
}
8592

8693
if let Some(cat) = params.get("category") {
94+
// Calculating the total number of results with filters is not supported yet.
95+
supports_seek = false;
96+
8797
query = query.filter(
8898
crates::id.eq_any(
8999
crates_categories::table
@@ -102,6 +112,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
102112
use diesel::sql_types::Array;
103113
sql_function!(#[aggregate] fn array_agg<T>(x: T) -> Array<T>);
104114

115+
// Calculating the total number of results with filters is not supported yet.
116+
supports_seek = false;
117+
105118
let names: Vec<_> = kws
106119
.split_whitespace()
107120
.map(|name| name.to_lowercase())
@@ -120,6 +133,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
120133
),
121134
);
122135
} else if let Some(kw) = params.get("keyword") {
136+
// Calculating the total number of results with filters is not supported yet.
137+
supports_seek = false;
138+
123139
query = query.filter(
124140
crates::id.eq_any(
125141
crates_keywords::table
@@ -129,6 +145,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
129145
),
130146
);
131147
} else if let Some(letter) = params.get("letter") {
148+
// Calculating the total number of results with filters is not supported yet.
149+
supports_seek = false;
150+
132151
let pattern = format!(
133152
"{}%",
134153
letter
@@ -140,6 +159,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
140159
);
141160
query = query.filter(canon_crate_name(crates::name).like(pattern));
142161
} else if let Some(user_id) = params.get("user_id").and_then(|s| s.parse::<i32>().ok()) {
162+
// Calculating the total number of results with filters is not supported yet.
163+
supports_seek = false;
164+
143165
query = query.filter(
144166
crates::id.eq_any(
145167
CrateOwner::by_owner_kind(OwnerKind::User)
@@ -148,6 +170,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
148170
),
149171
);
150172
} else if let Some(team_id) = params.get("team_id").and_then(|s| s.parse::<i32>().ok()) {
173+
// Calculating the total number of results with filters is not supported yet.
174+
supports_seek = false;
175+
151176
query = query.filter(
152177
crates::id.eq_any(
153178
CrateOwner::by_owner_kind(OwnerKind::Team)
@@ -156,6 +181,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
156181
),
157182
);
158183
} else if params.get("following").is_some() {
184+
// Calculating the total number of results with filters is not supported yet.
185+
supports_seek = false;
186+
159187
let user_id = req.authenticate()?.user_id();
160188
query = query.filter(
161189
crates::id.eq_any(
@@ -165,6 +193,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
165193
),
166194
);
167195
} else if params.get("ids[]").is_some() {
196+
// Calculating the total number of results with filters is not supported yet.
197+
supports_seek = false;
198+
168199
let query_bytes = req.query_string().unwrap_or("").as_bytes();
169200
let ids: Vec<_> = url::form_urlencoded::parse(query_bytes)
170201
.filter(|(key, _)| key == "ids[]")
@@ -175,6 +206,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
175206
}
176207

177208
if !include_yanked {
209+
// Calculating the total number of results with filters is not supported yet.
210+
supports_seek = false;
211+
178212
query = query.filter(exists(
179213
versions::table
180214
.filter(versions::crate_id.eq(crates::id))
@@ -183,28 +217,89 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult {
183217
}
184218

185219
if sort == Some("downloads") {
220+
// Custom sorting is not supported yet with seek.
221+
supports_seek = false;
222+
186223
query = query.then_order_by(crates::downloads.desc())
187224
} else if sort == Some("recent-downloads") {
225+
// Custom sorting is not supported yet with seek.
226+
supports_seek = false;
227+
188228
query = query.then_order_by(recent_crate_downloads::downloads.desc().nulls_last())
189229
} else if sort == Some("recent-updates") {
230+
// Custom sorting is not supported yet with seek.
231+
supports_seek = false;
232+
190233
query = query.order(crates::updated_at.desc());
191234
} else if sort == Some("new") {
235+
// Custom sorting is not supported yet with seek.
236+
supports_seek = false;
237+
192238
query = query.order(crates::created_at.desc());
193239
} else {
194240
query = query.then_order_by(crates::name.asc())
195241
}
196242

197-
let query = query.pages_pagination(
198-
PaginationOptions::builder()
199-
.limit_page_numbers(req.app().clone())
200-
.gather(req)?,
201-
);
243+
let pagination: PaginationOptions = PaginationOptions::builder()
244+
.limit_page_numbers(req.app().clone())
245+
.enable_seek(supports_seek)
246+
.gather(req)?;
202247
let conn = req.db_read_only()?;
203-
let data: Paginated<(Crate, bool, Option<i64>)> = query.load(&*conn)?;
204-
let total = data.total();
205248

206-
let next_page = data.next_page_params().map(|p| req.query_with_params(p));
207-
let prev_page = data.prev_page_params().map(|p| req.query_with_params(p));
249+
let (explicit_page, seek) = match pagination.page.clone() {
250+
Page::Numeric(_) => (true, None),
251+
Page::Seek(s) => (false, Some(s.decode::<i32>()?)),
252+
Page::Unspecified => (false, None),
253+
};
254+
255+
// To avoid breaking existing users, seek-based pagination is only used if an explicit page has
256+
// not been provided. This way clients relying on meta.next_page will use the faster seek-based
257+
// paginations, while client hardcoding pages handling will use the slower offset-based code.
258+
let (total, next_page, prev_page, data, conn) = if supports_seek && !explicit_page {
259+
// Equivalent of:
260+
// `WHERE name > (SELECT name FROM crates WHERE id = $1) LIMIT $2`
261+
query = query.limit(pagination.per_page as i64);
262+
if let Some(seek) = seek {
263+
let crate_name: String = crates::table
264+
.find(seek)
265+
.select(crates::name)
266+
.get_result(&*conn)?;
267+
query = query.filter(crates::name.gt(crate_name));
268+
}
269+
270+
// This does a full index-only scan over the crates table to gather how many crates were
271+
// published. Unfortunately on PostgreSQL counting the rows in a table requires scanning
272+
// the table, and the `total` field is part of the stable registries API.
273+
//
274+
// If this becomes a problem in the future the crates count could be denormalized, at least
275+
// for the filterless happy path.
276+
let total: i64 = crates::table.count().get_result(&*conn)?;
277+
278+
let results: Vec<(Crate, bool, Option<i64>)> = query.load(&*conn)?;
279+
280+
let next_page = if let Some(last) = results.last() {
281+
let mut params = IndexMap::new();
282+
params.insert(
283+
"seek".into(),
284+
crate::controllers::helpers::pagination::encode_seek(last.0.id)?,
285+
);
286+
Some(req.query_with_params(params))
287+
} else {
288+
None
289+
};
290+
291+
(total, next_page, None, results, conn)
292+
} else {
293+
let query = query.pages_pagination(pagination);
294+
let data: Paginated<(Crate, bool, Option<i64>)> = query.load(&*conn)?;
295+
(
296+
data.total(),
297+
data.next_page_params().map(|p| req.query_with_params(p)),
298+
data.prev_page_params().map(|p| req.query_with_params(p)),
299+
data.into_iter().collect::<Vec<_>>(),
300+
conn,
301+
)
302+
};
208303

209304
let perfect_matches = data.iter().map(|&(_, b, _)| b).collect::<Vec<_>>();
210305
let recent_downloads = data

0 commit comments

Comments
 (0)