Skip to content

Add more histograms #3775

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ impl App {
pub fn new(config: config::Server, http_client: Option<Client>) -> App {
use oauth2::{AuthUrl, ClientId, ClientSecret, TokenUrl};

let instance_metrics =
InstanceMetrics::new().expect("could not initialize instance metrics");

let github = GitHubClient::new(http_client.clone(), config.gh_base_url.clone());

let github_oauth = BasicClient::new(
Expand Down Expand Up @@ -116,7 +119,14 @@ impl App {
.connection_customizer(Box::new(primary_db_connection_config))
.thread_pool(thread_pool.clone());

DieselPool::new(&config.db.primary.url, primary_db_config).unwrap()
DieselPool::new(
&config.db.primary.url,
primary_db_config,
instance_metrics
.database_time_to_obtain_connection
.with_label_values(&["primary"]),
)
.unwrap()
};

let replica_database = if let Some(url) = config.db.replica.as_ref().map(|c| &c.url) {
Expand All @@ -135,7 +145,16 @@ impl App {
.connection_customizer(Box::new(replica_db_connection_config))
.thread_pool(thread_pool);

Some(DieselPool::new(url, replica_db_config).unwrap())
Some(
DieselPool::new(
url,
replica_db_config,
instance_metrics
.database_time_to_obtain_connection
.with_label_values(&["follower"]),
)
.unwrap(),
)
}
} else {
None
Expand All @@ -150,8 +169,7 @@ impl App {
downloads_counter: DownloadsCounter::new(),
emails: Emails::from_environment(),
service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"),
instance_metrics: InstanceMetrics::new()
.expect("could not initialize instance metrics"),
instance_metrics,
http_client,
}
}
Expand Down
13 changes: 0 additions & 13 deletions src/controllers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ mod prelude {

pub use crate::db::RequestTransaction;
pub use crate::middleware::app::RequestApp;
pub use crate::middleware::log_request::TimingRecorder;
pub use crate::util::errors::{cargo_err, AppError, AppResult, ChainError}; // TODO: Remove cargo_err from here
pub use crate::util::{AppResponse, EndpointResult};

Expand All @@ -39,7 +38,6 @@ mod prelude {
fn query_with_params(&self, params: IndexMap<String, String>) -> String;

fn log_metadata<V: std::fmt::Display>(&mut self, key: &'static str, value: V);
fn timing_recorder(&mut self) -> TimingRecorder;
}

impl<'a> RequestUtils for dyn RequestExt + 'a {
Expand Down Expand Up @@ -80,17 +78,6 @@ mod prelude {
fn log_metadata<V: std::fmt::Display>(&mut self, key: &'static str, value: V) {
crate::middleware::log_request::add_custom_metadata(self, key, value);
}

fn timing_recorder(&mut self) -> TimingRecorder {
if let Some(recorder) = self.extensions().find::<TimingRecorder>() {
recorder.clone()
} else {
let recorder = TimingRecorder::new();
self.mut_extensions()
.insert::<TimingRecorder>(recorder.clone());
recorder
}
}
}
}

Expand Down
22 changes: 12 additions & 10 deletions src/controllers/version/downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,28 @@ use chrono::{Duration, NaiveDate, Utc};
/// This returns a URL to the location where the crate is stored.
pub fn download(req: &mut dyn RequestExt) -> EndpointResult {
let app = req.app().clone();
let recorder = req.timing_recorder();

let mut crate_name = req.params()["crate_id"].clone();
let version = req.params()["version"].as_str();

let mut log_metadata = None;
match recorder.record("get_conn", || req.db_conn()) {
match req.db_conn() {
Ok(conn) => {
use self::versions::dsl::*;

// Returns the crate name as stored in the database, or an error if we could
// not load the version ID from the database.
let (version_id, canonical_crate_name) = recorder.record("get_version", || {
versions
.inner_join(crates::table)
.select((id, crates::name))
.filter(Crate::with_name(&crate_name))
.filter(num.eq(version))
.first::<(i32, String)>(&*conn)
})?;
let (version_id, canonical_crate_name) = app
.instance_metrics
.downloads_select_query_execution_time
.observe_closure_duration(|| {
versions
.inner_join(crates::table)
.select((id, crates::name))
.filter(Crate::with_name(&crate_name))
.filter(num.eq(version))
.first::<(i32, String)>(&*conn)
})?;

if canonical_crate_name != crate_name {
app.instance_metrics
Expand Down
23 changes: 17 additions & 6 deletions src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use conduit::RequestExt;
use diesel::prelude::*;
use diesel::r2d2::{self, ConnectionManager, CustomizeConnection};
use parking_lot::{ReentrantMutex, ReentrantMutexGuard};
use prometheus::Histogram;
use std::sync::Arc;
use std::{ops::Deref, time::Duration};
use url::Url;
Expand All @@ -10,14 +11,18 @@ use crate::middleware::app::RequestApp;

#[derive(Clone)]
pub enum DieselPool {
Pool(r2d2::Pool<ConnectionManager<PgConnection>>),
Pool {
pool: r2d2::Pool<ConnectionManager<PgConnection>>,
time_to_obtain_connection_metric: Histogram,
},
Test(Arc<ReentrantMutex<PgConnection>>),
}

impl DieselPool {
pub(crate) fn new(
url: &str,
config: r2d2::Builder<ConnectionManager<PgConnection>>,
time_to_obtain_connection_metric: Histogram,
) -> Result<DieselPool, PoolError> {
let manager = ConnectionManager::new(connection_url(url));

Expand All @@ -32,7 +37,10 @@ impl DieselPool {
// serving errors for the first connections until the pool is initialized) and if we can't
// establish any connection continue booting up the application. The database pool will
// automatically be marked as unhealthy and the rest of the application will adapt.
let pool = DieselPool::Pool(config.build_unchecked(manager));
let pool = DieselPool::Pool {
pool: config.build_unchecked(manager),
time_to_obtain_connection_metric,
};
match pool.wait_until_healthy(Duration::from_secs(5)) {
Ok(()) => {}
Err(PoolError::UnhealthyPool) => {}
Expand All @@ -52,22 +60,25 @@ impl DieselPool {

pub fn get(&self) -> Result<DieselPooledConn<'_>, PoolError> {
match self {
DieselPool::Pool(pool) => {
DieselPool::Pool {
pool,
time_to_obtain_connection_metric,
} => time_to_obtain_connection_metric.observe_closure_duration(|| {
if let Some(conn) = pool.try_get() {
Ok(DieselPooledConn::Pool(conn))
} else if !self.is_healthy() {
Err(PoolError::UnhealthyPool)
} else {
Ok(DieselPooledConn::Pool(pool.get().map_err(PoolError::R2D2)?))
}
}
}),
DieselPool::Test(conn) => Ok(DieselPooledConn::Test(conn.lock())),
}
}

pub fn state(&self) -> PoolState {
match self {
DieselPool::Pool(pool) => {
DieselPool::Pool { pool, .. } => {
let state = pool.state();
PoolState {
connections: state.connections,
Expand All @@ -83,7 +94,7 @@ impl DieselPool {

pub fn wait_until_healthy(&self, timeout: Duration) -> Result<(), PoolError> {
match self {
DieselPool::Pool(pool) => match pool.get_timeout(timeout) {
DieselPool::Pool { pool, .. } => match pool.get_timeout(timeout) {
Ok(_) => Ok(()),
Err(_) if !self.is_healthy() => Err(PoolError::UnhealthyPool),
Err(err) => Err(PoolError::R2D2(err)),
Expand Down
6 changes: 5 additions & 1 deletion src/metrics/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
use crate::util::errors::AppResult;
use crate::{app::App, db::DieselPool};
use prometheus::{
proto::MetricFamily, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
proto::MetricFamily, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
};

metrics! {
Expand All @@ -29,6 +29,8 @@ metrics! {
database_idle_conns: IntGaugeVec["pool"],
/// Number of used database connections in the pool
database_used_conns: IntGaugeVec["pool"],
/// Amount of time required to obtain a database connection
pub database_time_to_obtain_connection: HistogramVec["pool"],

/// Number of requests processed by this instance
pub requests_total: IntCounter,
Expand All @@ -44,6 +46,8 @@ metrics! {
pub downloads_unconditional_redirects_total: IntCounter,
/// Number of download requests with a non-canonical crate name.
pub downloads_non_canonical_crate_name_total: IntCounter,
/// How long it takes to execute the SELECT query in the download endpoint.
pub downloads_select_query_execution_time: Histogram,
/// Number of download requests that are not counted yet.
downloads_not_counted_total: IntGauge,
}
Expand Down
44 changes: 1 addition & 43 deletions src/middleware/log_request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,8 @@ use conduit::{header, Host, RequestExt, Scheme, StatusCode};
use conduit_cookie::RequestSession;
use sentry::Level;

use std::cell::RefCell;
use std::collections::HashMap;
use std::fmt::{self, Display, Formatter};
use std::rc::Rc;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use std::time::{SystemTime, UNIX_EPOCH};

const SLOW_REQUEST_THRESHOLD_MS: u64 = 1000;

Expand Down Expand Up @@ -94,34 +91,6 @@ pub fn add_custom_metadata<V: Display>(req: &mut dyn RequestExt, key: &'static s
}
}

#[derive(Debug, Clone)]
pub struct TimingRecorder {
sections: Rc<RefCell<HashMap<&'static str, Duration>>>,
}

impl TimingRecorder {
pub fn new() -> Self {
Self {
sections: Rc::new(RefCell::new(HashMap::new())),
}
}

pub fn record<R>(&self, name: &'static str, f: impl FnOnce() -> R) -> R {
let start = Instant::now();
let res = f();
self.sections
.borrow_mut()
.insert(name, Instant::now() - start);
res
}
}

impl Default for TimingRecorder {
fn default() -> Self {
Self::new()
}
}

fn report_to_sentry(req: &dyn RequestExt, res: &AfterResult, response_time: u64) {
let (message, level) = match res {
Err(e) => (e.to_string(), Level::Error),
Expand Down Expand Up @@ -255,17 +224,6 @@ impl Display for RequestLine<'_> {

if self.response_time > SLOW_REQUEST_THRESHOLD_MS {
line.add_marker("SLOW REQUEST")?;

if let Some(timings) = self.req.extensions().find::<TimingRecorder>() {
for (section, duration) in timings.sections.borrow().iter() {
line.add_quoted_field(
format!("timing_{}", section),
// Debug formatting rounds the duration to the most useful unit and adds
// the unit suffix. For example: 1.20s, 10.00ms, 8.35ns
format!("{:.2?}", duration),
)?;
}
}
}

Ok(())
Expand Down