Skip to content

Commit 68f6824

Browse files
committed
Require all traffic to provide a User-Agent header.
We want to be able to actually differentiate crawlers from each other, so I've nudged them towards actually using a unique user agent (we probably won't ever actually block generic UAs since folks sometimes do actually use curl/wget from the command line). Additionally, I've had a lot of cases lately where a crawler has been outside of what we allow, but wasn't actually causing a service impact. If I could contact those people without having to block their traffic, I would. So I've also worded the message to try and nudge folks towards including contact info, which most commercial bots already do.
1 parent 7fc9d43 commit 68f6824

File tree

6 files changed

+74
-4
lines changed

6 files changed

+74
-4
lines changed

src/middleware/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod ember_index_rewrite;
2020
mod ensure_well_formed_500;
2121
mod head;
2222
mod log_request;
23+
mod require_user_agent;
2324
mod security_headers;
2425
mod static_or_continue;
2526

@@ -85,6 +86,7 @@ pub fn build_middleware(app: Arc<App>, endpoints: R404) -> MiddlewareBuilder {
8586
let ips = ip_list.split(',').map(String::from).collect();
8687
m.around(blacklist_ips::BlockIps::new(ips));
8788
}
89+
m.around(require_user_agent::RequireUserAgent::default());
8890

8991
if env != Env::Test {
9092
m.around(log_request::LogRequests::default());
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
We require that all requests include a `User-Agent` header. To allow us to determine the impact your bot has on our service, we ask that your user agent actually identify your bot, and not just report the HTTP client library you're using. Including contact information will also reduce the chance that we will need to take action against your bot.
2+
3+
Bad:
4+
User-Agent: reqwest/0.9.1
5+
6+
Better:
7+
User-Agent: my_crawler
8+
9+
Best:
10+
User-Agent: my_crawler (my_crawler.com/info)
11+
User-Agent: my_crawler (help@my_crawler.com)
12+
13+
If you believe you've received this message in error, please email [email protected] and include the request id {}.

src/middleware/require_user_agent.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//! Middleware that blocks requests with no user-agent header
2+
3+
use super::prelude::*;
4+
5+
use std::collections::HashMap;
6+
use std::io::Cursor;
7+
use util::request_header;
8+
9+
// Can't derive debug because of Handler.
10+
#[allow(missing_debug_implementations)]
11+
#[derive(Default)]
12+
pub struct RequireUserAgent {
13+
handler: Option<Box<dyn Handler>>,
14+
}
15+
16+
impl AroundMiddleware for RequireUserAgent {
17+
fn with_handler(&mut self, handler: Box<dyn Handler>) {
18+
self.handler = Some(handler);
19+
}
20+
}
21+
22+
impl Handler for RequireUserAgent {
23+
fn call(&self, req: &mut dyn Request) -> Result<Response, Box<dyn Error + Send>> {
24+
let has_user_agent = request_header(req, "User-Agent") != "";
25+
if !has_user_agent {
26+
let body = format!(
27+
include_str!("no_user_agent_message.txt"),
28+
request_header(req, "X-Request-Id"),
29+
);
30+
let mut headers = HashMap::new();
31+
headers.insert("Content-Length".to_string(), vec![body.len().to_string()]);
32+
Ok(Response {
33+
status: (403, "Forbidden"),
34+
headers,
35+
body: Box::new(Cursor::new(body.into_bytes())),
36+
})
37+
} else {
38+
self.handler.as_ref().unwrap().call(req)
39+
}
40+
}
41+
}

src/tests/all.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,9 @@ fn env(var: &str) -> String {
187187
}
188188

189189
fn req(method: conduit::Method, path: &str) -> MockRequest {
190-
MockRequest::new(method, path)
190+
let mut request = MockRequest::new(method, path);
191+
request.header("User-Agent", "conduit-test");
192+
request
191193
}
192194

193195
fn ok_resp(r: &conduit::Response) -> bool {

src/tests/server.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use conduit::Method;
2+
3+
use {app, req};
4+
5+
#[test]
6+
fn user_agent_is_required() {
7+
let (_b, _app, middle) = app();
8+
9+
let mut req = req(Method::Get, "/api/v1/crates");
10+
req.header("User-Agent", "");
11+
bad_resp!(middle.call(&mut req));
12+
}

src/tests/util.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ pub struct MockAnonymousUser {
158158

159159
impl RequestHelper for MockAnonymousUser {
160160
fn request_builder(&self, method: Method, path: &str) -> MockRequest {
161-
MockRequest::new(method, path)
161+
::req(method, path)
162162
}
163163

164164
fn app(&self) -> &TestApp {
@@ -177,7 +177,7 @@ pub struct MockCookieUser {
177177

178178
impl RequestHelper for MockCookieUser {
179179
fn request_builder(&self, method: Method, path: &str) -> MockRequest {
180-
let mut request = MockRequest::new(method, path);
180+
let mut request = ::req(method, path);
181181
request.mut_extensions().insert(self.user.clone());
182182
request
183183
.mut_extensions()
@@ -218,7 +218,7 @@ pub struct MockTokenUser {
218218

219219
impl RequestHelper for MockTokenUser {
220220
fn request_builder(&self, method: Method, path: &str) -> MockRequest {
221-
let mut request = MockRequest::new(method, path);
221+
let mut request = ::req(method, path);
222222
request.header("Authorization", &self.token.token);
223223
request
224224
}

0 commit comments

Comments
 (0)