Skip to content

Commit 438a6ba

Browse files
committed
Split query execution into hot and cold paths
1 parent 3776f4b commit 438a6ba

File tree

3 files changed

+158
-106
lines changed

3 files changed

+158
-106
lines changed

src/librustc/dep_graph/graph.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,7 @@ impl CurrentDepGraph {
11221122
}
11231123

11241124
impl DepGraphData {
1125+
#[inline]
11251126
fn read_index(&self, source: DepNodeIndex) {
11261127
ty::tls::with_context_opt(|icx| {
11271128
let icx = if let Some(icx) = icx { icx } else { return };

src/librustc/ty/context.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,7 @@ pub mod tls {
16841684

16851685
/// Gets the pointer to the current `ImplicitCtxt`.
16861686
#[cfg(not(parallel_compiler))]
1687+
#[inline]
16871688
fn get_tlv() -> usize {
16881689
TLV.with(|tlv| tlv.get())
16891690
}

src/librustc/ty/query/plumbing.rs

Lines changed: 156 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ use crate::ty::{self, TyCtxt};
1212
#[cfg(not(parallel_compiler))]
1313
use rustc_data_structures::cold_path;
1414
use rustc_data_structures::fx::{FxHashMap, FxHasher};
15-
#[cfg(parallel_compiler)]
16-
use rustc_data_structures::profiling::TimingGuard;
1715
use rustc_data_structures::sharded::Sharded;
18-
use rustc_data_structures::sync::Lock;
16+
use rustc_data_structures::sync::{Lock, LockGuard};
1917
use rustc_data_structures::thin_vec::ThinVec;
2018
use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level};
2119
use rustc_span::source_map::DUMMY_SP;
@@ -70,6 +68,12 @@ impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> {
7068
}
7169
}
7270

71+
/// Values used when checking a query cache which can be reused on a cache-miss to execute the query.
72+
pub(super) struct QueryLookup<'tcx, Q: QueryDescription<'tcx>> {
73+
shard: usize,
74+
lock: LockGuard<'tcx, QueryCache<'tcx, Q>>,
75+
}
76+
7377
/// A type representing the responsibility to execute the job in the `job` field.
7478
/// This will poison the relevant query if dropped.
7579
pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
@@ -81,119 +85,87 @@ pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
8185
impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
8286
/// Either gets a `JobOwner` corresponding the query, allowing us to
8387
/// start executing the query, or returns with the result of the query.
84-
/// If the query is executing elsewhere, this will wait for it.
88+
/// This function assumes that `try_get_cached` is already called and returned `lookup`.
89+
/// If the query is executing elsewhere, this will wait for it and return the result.
8590
/// If the query panicked, this will silently panic.
8691
///
8792
/// This function is inlined because that results in a noticeable speed-up
8893
/// for some compile-time benchmarks.
8994
#[inline(always)]
90-
pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
91-
// Handling the `query_blocked_prof_timer` is a bit weird because of the
92-
// control flow in this function: Blocking is implemented by
93-
// awaiting a running job and, once that is done, entering the loop below
94-
// again from the top. In that second iteration we will hit the
95-
// cache which provides us with the information we need for
96-
// finishing the "query-blocked" event.
97-
//
98-
// We thus allocate `query_blocked_prof_timer` outside the loop,
99-
// initialize it during the first iteration and finish it during the
100-
// second iteration.
101-
#[cfg(parallel_compiler)]
102-
let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;
103-
104-
let cache = Q::query_cache(tcx);
105-
loop {
106-
// We compute the key's hash once and then use it for both the
107-
// shard lookup and the hashmap lookup. This relies on the fact
108-
// that both of them use `FxHasher`.
109-
let mut state = FxHasher::default();
110-
key.hash(&mut state);
111-
let key_hash = state.finish();
112-
113-
let shard = cache.get_shard_index_by_hash(key_hash);
114-
let mut lock_guard = cache.get_shard_by_index(shard).lock();
115-
let lock = &mut *lock_guard;
116-
117-
if let Some((_, value)) =
118-
lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
119-
{
120-
if unlikely!(tcx.prof.enabled()) {
121-
tcx.prof.query_cache_hit(value.index.into());
122-
123-
#[cfg(parallel_compiler)]
124-
{
125-
if let Some(prof_timer) = query_blocked_prof_timer.take() {
126-
prof_timer.finish_with_query_invocation_id(value.index.into());
127-
}
128-
}
129-
}
95+
pub(super) fn try_start(
96+
tcx: TyCtxt<'tcx>,
97+
span: Span,
98+
key: &Q::Key,
99+
mut lookup: QueryLookup<'tcx, Q>,
100+
) -> TryGetJob<'a, 'tcx, Q> {
101+
let lock = &mut *lookup.lock;
102+
103+
let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) {
104+
Entry::Occupied(mut entry) => {
105+
match entry.get_mut() {
106+
QueryResult::Started(job) => {
107+
// For parallel queries, we'll block and wait until the query running
108+
// in another thread has completed. Record how long we wait in the
109+
// self-profiler.
110+
let _query_blocked_prof_timer = if cfg!(parallel_compiler) {
111+
Some(tcx.prof.query_blocked())
112+
} else {
113+
None
114+
};
115+
116+
// Create the id of the job we're waiting for
117+
let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind());
130118

131-
let result = (value.value.clone(), value.index);
132-
#[cfg(debug_assertions)]
133-
{
134-
lock.cache_hits += 1;
119+
(job.latch(id), _query_blocked_prof_timer)
120+
}
121+
QueryResult::Poisoned => FatalError.raise(),
135122
}
136-
return TryGetJob::JobCompleted(result);
137123
}
124+
Entry::Vacant(entry) => {
125+
// No job entry for this query. Return a new one to be started later.
138126

139-
let latch = match lock.active.entry((*key).clone()) {
140-
Entry::Occupied(mut entry) => {
141-
match entry.get_mut() {
142-
QueryResult::Started(job) => {
143-
// For parallel queries, we'll block and wait until the query running
144-
// in another thread has completed. Record how long we wait in the
145-
// self-profiler.
146-
#[cfg(parallel_compiler)]
147-
{
148-
query_blocked_prof_timer = Some(tcx.prof.query_blocked());
149-
}
150-
151-
// Create the id of the job we're waiting for
152-
let id = QueryJobId::new(job.id, shard, Q::dep_kind());
127+
// Generate an id unique within this shard.
128+
let id = lock.jobs.checked_add(1).unwrap();
129+
lock.jobs = id;
130+
let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
153131

154-
job.latch(id)
155-
}
156-
QueryResult::Poisoned => FatalError.raise(),
157-
}
158-
}
159-
Entry::Vacant(entry) => {
160-
// No job entry for this query. Return a new one to be started later.
132+
let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind());
161133

162-
// Generate an id unique within this shard.
163-
let id = lock.jobs.checked_add(1).unwrap();
164-
lock.jobs = id;
165-
let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
134+
let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
166135

167-
let global_id = QueryJobId::new(id, shard, Q::dep_kind());
136+
entry.insert(QueryResult::Started(job));
168137

169-
let job =
170-
tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
138+
let owner =
139+
JobOwner { cache: Q::query_cache(tcx), id: global_id, key: (*key).clone() };
140+
return TryGetJob::NotYetStarted(owner);
141+
}
142+
};
143+
mem::drop(lookup.lock);
171144

172-
entry.insert(QueryResult::Started(job));
145+
// If we are single-threaded we know that we have cycle error,
146+
// so we just return the error.
147+
#[cfg(not(parallel_compiler))]
148+
return TryGetJob::Cycle(cold_path(|| {
149+
Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
150+
}));
173151

174-
let owner = JobOwner { cache, id: global_id, key: (*key).clone() };
175-
return TryGetJob::NotYetStarted(owner);
176-
}
177-
};
178-
mem::drop(lock_guard);
152+
// With parallel queries we might just have to wait on some other
153+
// thread.
154+
#[cfg(parallel_compiler)]
155+
{
156+
let result = latch.wait_on(tcx, span);
179157

180-
// If we are single-threaded we know that we have cycle error,
181-
// so we just return the error.
182-
#[cfg(not(parallel_compiler))]
183-
return TryGetJob::Cycle(cold_path(|| {
184-
Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
185-
}));
158+
if let Err(cycle) = result {
159+
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
160+
}
186161

187-
// With parallel queries we might just have to wait on some other
188-
// thread.
189-
#[cfg(parallel_compiler)]
190-
{
191-
let result = latch.wait_on(tcx, span);
162+
let cached = tcx.try_get_cached::<Q>(key).0.unwrap();
192163

193-
if let Err(cycle) = result {
194-
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
195-
}
164+
if let Some(prof_timer) = _query_blocked_prof_timer.take() {
165+
prof_timer.finish_with_query_invocation_id(cached.1.into());
196166
}
167+
168+
return TryGetJob::JobCompleted(cached);
197169
}
198170
}
199171

@@ -269,6 +241,7 @@ pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> {
269241
/// The query was already completed.
270242
/// Returns the result of the query and its dep-node index
271243
/// if it succeeded or a cycle error if it failed.
244+
#[cfg(parallel_compiler)]
272245
JobCompleted((D::Value, DepNodeIndex)),
273246

274247
/// Trying to execute the query resulted in a cycle.
@@ -396,13 +369,76 @@ impl<'tcx> TyCtxt<'tcx> {
396369
eprintln!("end of query stack");
397370
}
398371

399-
#[inline(never)]
400-
pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key) -> Q::Value {
372+
/// Checks if the query is already computed and in the cache.
373+
/// It returns the shard index and a lock guard to the shard,
374+
/// which will be used if the query is not in the cache and we need
375+
/// to compute it.
376+
#[inline]
377+
fn try_get_cached<Q: QueryDescription<'tcx>>(
378+
self,
379+
key: &Q::Key,
380+
) -> (Option<(Q::Value, DepNodeIndex)>, QueryLookup<'tcx, Q>) {
381+
let cache = Q::query_cache(self);
382+
383+
// We compute the key's hash once and then use it for both the
384+
// shard lookup and the hashmap lookup. This relies on the fact
385+
// that both of them use `FxHasher`.
386+
let mut state = FxHasher::default();
387+
key.hash(&mut state);
388+
let key_hash = state.finish();
389+
390+
let shard = cache.get_shard_index_by_hash(key_hash);
391+
let mut lock_guard = cache.get_shard_by_index(shard).lock();
392+
let lock = &mut *lock_guard;
393+
394+
let result =
395+
lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key).map(|(_, value)| {
396+
if unlikely!(self.prof.enabled()) {
397+
self.prof.query_cache_hit(value.index.into());
398+
}
399+
400+
(value.value.clone(), value.index)
401+
});
402+
403+
#[cfg(debug_assertions)]
404+
{
405+
if result.is_some() {
406+
lock.cache_hits += 1;
407+
}
408+
}
409+
410+
(result, QueryLookup { lock: lock_guard, shard })
411+
}
412+
413+
#[inline]
414+
pub(super) fn get_query<Q: QueryDescription<'tcx> + 'tcx>(
415+
self,
416+
span: Span,
417+
key: Q::Key,
418+
) -> Q::Value {
401419
debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span);
402420

403-
let job = match JobOwner::try_get(self, span, &key) {
421+
let (cached, lookup) = self.try_get_cached::<Q>(&key);
422+
423+
if let Some((v, index)) = cached {
424+
self.dep_graph.read_index(index);
425+
return v;
426+
}
427+
428+
self.try_execute_query(span, key, lookup)
429+
}
430+
431+
#[inline(never)]
432+
pub(super) fn try_execute_query<Q: QueryDescription<'tcx>>(
433+
self,
434+
span: Span,
435+
key: Q::Key,
436+
lookup: QueryLookup<'tcx, Q>,
437+
) -> Q::Value {
438+
let job = match JobOwner::try_start(self, span, &key, lookup) {
404439
TryGetJob::NotYetStarted(job) => job,
405440
TryGetJob::Cycle(result) => return result,
441+
#[cfg(parallel_compiler)]
406442
TryGetJob::JobCompleted((v, index)) => {
407443
self.dep_graph.read_index(index);
408444
return v;
@@ -615,7 +651,7 @@ impl<'tcx> TyCtxt<'tcx> {
615651
/// side-effects -- e.g., in order to report errors for erroneous programs.
616652
///
617653
/// Note: The optimization is only available during incr. comp.
618-
pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
654+
pub(super) fn ensure_query<Q: QueryDescription<'tcx> + 'tcx>(self, key: Q::Key) -> () {
619655
if Q::EVAL_ALWAYS {
620656
let _ = self.get_query::<Q>(DUMMY_SP, key);
621657
return;
@@ -643,12 +679,26 @@ impl<'tcx> TyCtxt<'tcx> {
643679
}
644680

645681
#[allow(dead_code)]
646-
fn force_query<Q: QueryDescription<'tcx>>(self, key: Q::Key, span: Span, dep_node: DepNode) {
682+
fn force_query<Q: QueryDescription<'tcx> + 'tcx>(
683+
self,
684+
key: Q::Key,
685+
span: Span,
686+
dep_node: DepNode,
687+
) {
647688
// We may be concurrently trying both execute and force a query.
648689
// Ensure that only one of them runs the query.
649-
let job = match JobOwner::try_get(self, span, &key) {
690+
691+
let (cached, lookup) = self.try_get_cached::<Q>(&key);
692+
693+
if cached.is_some() {
694+
return;
695+
}
696+
697+
let job = match JobOwner::try_start(self, span, &key, lookup) {
650698
TryGetJob::NotYetStarted(job) => job,
651-
TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return,
699+
TryGetJob::Cycle(_) => return,
700+
#[cfg(parallel_compiler)]
701+
TryGetJob::JobCompleted(_) => return,
652702
};
653703
self.force_query_with_job::<Q>(key, job, dep_node);
654704
}
@@ -1065,7 +1115,7 @@ macro_rules! define_queries_inner {
10651115
}
10661116

10671117
$($(#[$attr])*
1068-
#[inline(always)]
1118+
#[inline]
10691119
pub fn $name(self, key: $K) -> $V {
10701120
self.at(DUMMY_SP).$name(key)
10711121
})*
@@ -1102,7 +1152,7 @@ macro_rules! define_queries_inner {
11021152

11031153
impl TyCtxtAt<$tcx> {
11041154
$($(#[$attr])*
1105-
#[inline(always)]
1155+
#[inline]
11061156
pub fn $name(self, key: $K) -> $V {
11071157
self.tcx.get_query::<queries::$name<'_>>(self.span, key)
11081158
})*

0 commit comments

Comments
 (0)