Skip to content

Commit 4ba9fdb

Browse files
bors-voyager[bot]sgrifjtgeibel
committed
Merge #1534
1534: Require all traffic to provide a `User-Agent` header. r=sgrif a=sgrif We want to be able to actually differentiate crawlers from each other, so I've nudged them towards actually using a unique user agent (we probably won't ever actually block generic UAs since folks sometimes do actually use curl/wget from the command line). Additionally, I've had a lot of cases lately where a crawler has been outside of what we allow, but wasn't actually causing a service impact. If I could contact those people without having to block their traffic, I would. So I've also worded the message to try and nudge folks towards including contact info, which most commercial bots already do. Co-authored-by: Sean Griffin <[email protected]> Co-authored-by: Justin Geibel <[email protected]>
2 parents 04db8a7 + 5a27609 commit 4ba9fdb

File tree

6 files changed

+76
-4
lines changed

6 files changed

+76
-4
lines changed

src/middleware/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod ember_index_rewrite;
2020
mod ensure_well_formed_500;
2121
mod head;
2222
mod log_request;
23+
mod require_user_agent;
2324
mod security_headers;
2425
mod static_or_continue;
2526

@@ -85,6 +86,7 @@ pub fn build_middleware(app: Arc<App>, endpoints: R404) -> MiddlewareBuilder {
8586
let ips = ip_list.split(',').map(String::from).collect();
8687
m.around(block_ips::BlockIps::new(ips));
8788
}
89+
m.around(require_user_agent::RequireUserAgent::default());
8890

8991
if env != Env::Test {
9092
m.around(log_request::LogRequests::default());
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
We require that all requests include a `User-Agent` header. To allow us to determine the impact your bot has on our service, we ask that your user agent actually identify your bot, and not just report the HTTP client library you're using. Including contact information will also reduce the chance that we will need to take action against your bot.
2+
3+
Bad:
4+
User-Agent: reqwest/0.9.1
5+
6+
Better:
7+
User-Agent: my_crawler
8+
9+
Best:
10+
User-Agent: my_crawler (my_crawler.com/info)
11+
User-Agent: my_crawler (help@my_crawler.com)
12+
13+
If you believe you've received this message in error, please email [email protected] and include the request id {}.

src/middleware/require_user_agent.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//! Middleware that blocks requests with no user-agent header
2+
3+
use super::prelude::*;
4+
5+
use std::collections::HashMap;
6+
use std::io::Cursor;
7+
use util::request_header;
8+
9+
// Can't derive debug because of Handler.
10+
#[allow(missing_debug_implementations)]
11+
#[derive(Default)]
12+
pub struct RequireUserAgent {
13+
handler: Option<Box<dyn Handler>>,
14+
}
15+
16+
impl AroundMiddleware for RequireUserAgent {
17+
fn with_handler(&mut self, handler: Box<dyn Handler>) {
18+
self.handler = Some(handler);
19+
}
20+
}
21+
22+
impl Handler for RequireUserAgent {
23+
fn call(&self, req: &mut dyn Request) -> Result<Response, Box<dyn Error + Send>> {
24+
let has_user_agent = request_header(req, "User-Agent") != "";
25+
if !has_user_agent {
26+
let body = format!(
27+
include_str!("no_user_agent_message.txt"),
28+
request_header(req, "X-Request-Id"),
29+
);
30+
let mut headers = HashMap::new();
31+
headers.insert("Content-Length".to_string(), vec![body.len().to_string()]);
32+
Ok(Response {
33+
status: (403, "Forbidden"),
34+
headers,
35+
body: Box::new(Cursor::new(body.into_bytes())),
36+
})
37+
} else {
38+
self.handler.as_ref().unwrap().call(req)
39+
}
40+
}
41+
}

src/tests/all.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ mod krate;
9090
mod owners;
9191
mod record;
9292
mod schema_details;
93+
mod server;
9394
mod team;
9495
mod token;
9596
mod user;
@@ -187,7 +188,9 @@ fn env(var: &str) -> String {
187188
}
188189

189190
fn req(method: conduit::Method, path: &str) -> MockRequest {
190-
MockRequest::new(method, path)
191+
let mut request = MockRequest::new(method, path);
192+
request.header("User-Agent", "conduit-test");
193+
request
191194
}
192195

193196
fn ok_resp(r: &conduit::Response) -> bool {

src/tests/server.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
use conduit::{Handler, Method};
2+
3+
use {app, req};
4+
5+
#[test]
6+
fn user_agent_is_required() {
7+
let (_b, _app, middle) = app();
8+
9+
let mut req = req(Method::Get, "/api/v1/crates");
10+
req.header("User-Agent", "");
11+
let resp = t!(middle.call(&mut req));
12+
assert_eq!(resp.status.0, 403);
13+
}

src/tests/util.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ pub struct MockAnonymousUser {
158158

159159
impl RequestHelper for MockAnonymousUser {
160160
fn request_builder(&self, method: Method, path: &str) -> MockRequest {
161-
MockRequest::new(method, path)
161+
::req(method, path)
162162
}
163163

164164
fn app(&self) -> &TestApp {
@@ -177,7 +177,7 @@ pub struct MockCookieUser {
177177

178178
impl RequestHelper for MockCookieUser {
179179
fn request_builder(&self, method: Method, path: &str) -> MockRequest {
180-
let mut request = MockRequest::new(method, path);
180+
let mut request = ::req(method, path);
181181
request.mut_extensions().insert(self.user.clone());
182182
request
183183
.mut_extensions()
@@ -218,7 +218,7 @@ pub struct MockTokenUser {
218218

219219
impl RequestHelper for MockTokenUser {
220220
fn request_builder(&self, method: Method, path: &str) -> MockRequest {
221-
let mut request = MockRequest::new(method, path);
221+
let mut request = ::req(method, path);
222222
request.header("Authorization", &self.token.token);
223223
request
224224
}

0 commit comments

Comments
 (0)