Skip to content

Commit 3d610b4

Browse files
Implement DepGraph::read_index fast path via SSE2.
1 parent a40d83e commit 3d610b4

File tree

4 files changed

+103
-48
lines changed

4 files changed

+103
-48
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2063,6 +2063,7 @@ dependencies = [
20632063
"backtrace 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
20642064
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
20652065
"byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
2066+
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
20662067
"chalk-engine 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
20672068
"flate2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
20682069
"fmt_macros 0.0.0",

src/librustc/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ crate-type = ["dylib"]
1111
[dependencies]
1212
arena = { path = "../libarena" }
1313
bitflags = "1.0"
14+
cfg-if = "0.1.2"
1415
fmt_macros = { path = "../libfmt_macros" }
1516
graphviz = { path = "../libgraphviz" }
1617
jobserver = "0.1"

src/librustc/dep_graph/graph.rs

Lines changed: 98 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use smallvec::SmallVec;
1616
use rustc_data_structures::sync::{Lrc, Lock};
1717
use std::env;
1818
use std::hash::Hash;
19+
use std::mem;
1920
use ty::{self, TyCtxt};
2021
use util::common::{ProfileQueriesMsg, profq_msg};
2122

@@ -208,8 +209,7 @@ impl DepGraph {
208209
self.with_task_impl(key, cx, arg, false, task,
209210
|key| OpenTask::Regular(Lock::new(RegularOpenTask {
210211
node: key,
211-
reads: SmallVec::new(),
212-
read_set: Default::default(),
212+
read_set: OrderedDepIndexSet::new(),
213213
})),
214214
|data, key, task| data.borrow_mut().complete_task(key, task))
215215
}
@@ -352,8 +352,7 @@ impl DepGraph {
352352
if let Some(ref data) = self.data {
353353
let (result, open_task) = ty::tls::with_context(|icx| {
354354
let task = OpenTask::Anon(Lock::new(AnonOpenTask {
355-
reads: SmallVec::new(),
356-
read_set: Default::default(),
355+
read_set: OrderedDepIndexSet::new(),
357356
}));
358357

359358
let r = {
@@ -949,8 +948,7 @@ impl CurrentDepGraph {
949948
if let OpenTask::Regular(task) = task {
950949
let RegularOpenTask {
951950
node,
952-
read_set: _,
953-
reads
951+
read_set,
954952
} = task.into_inner();
955953
assert_eq!(node, key);
956954

@@ -961,22 +959,22 @@ impl CurrentDepGraph {
961959
// when called for LOCAL_CRATE) or they depend on a CrateMetadata
962960
// node.
963961
if cfg!(debug_assertions) {
964-
if node.kind.is_input() && reads.len() > 0 &&
962+
if node.kind.is_input() && read_set.reads.len() > 0 &&
965963
// FIXME(mw): Special case for DefSpan until Spans are handled
966964
// better in general.
967965
node.kind != DepKind::DefSpan &&
968-
reads.iter().any(|&i| {
966+
read_set.reads.iter().any(|&i| {
969967
!(self.nodes[i].kind == DepKind::CrateMetadata ||
970968
self.nodes[i].kind == DepKind::Krate)
971969
})
972970
{
973971
bug!("Input node {:?} with unexpected reads: {:?}",
974972
node,
975-
reads.iter().map(|&i| self.nodes[i]).collect::<Vec<_>>())
973+
read_set.reads.iter().map(|&i| self.nodes[i]).collect::<Vec<_>>())
976974
}
977975
}
978976

979-
self.alloc_node(node, reads)
977+
self.alloc_node(node, read_set.reads)
980978
} else {
981979
bug!("complete_task() - Expected regular task to be popped")
982980
}
@@ -985,18 +983,17 @@ impl CurrentDepGraph {
985983
fn pop_anon_task(&mut self, kind: DepKind, task: OpenTask) -> DepNodeIndex {
986984
if let OpenTask::Anon(task) = task {
987985
let AnonOpenTask {
988-
read_set: _,
989-
reads
986+
read_set,
990987
} = task.into_inner();
991988
debug_assert!(!kind.is_input());
992989

993990
let mut fingerprint = self.anon_id_seed;
994991
let mut hasher = StableHasher::new();
995992

996-
for &read in reads.iter() {
993+
for &read in read_set.reads.iter() {
997994
let read_dep_node = self.nodes[read];
998995

999-
::std::mem::discriminant(&read_dep_node.kind).hash(&mut hasher);
996+
mem::discriminant(&read_dep_node.kind).hash(&mut hasher);
1000997

1001998
// Fingerprint::combine() is faster than sending Fingerprint
1002999
// through the StableHasher (at least as long as StableHasher
@@ -1014,7 +1011,7 @@ impl CurrentDepGraph {
10141011
if let Some(&index) = self.node_to_node_index.get(&target_dep_node) {
10151012
index
10161013
} else {
1017-
self.alloc_node(target_dep_node, reads)
1014+
self.alloc_node(target_dep_node, read_set.reads)
10181015
}
10191016
} else {
10201017
bug!("pop_anon_task() - Expected anonymous task to be popped")
@@ -1039,35 +1036,12 @@ impl CurrentDepGraph {
10391036
match *icx.task {
10401037
OpenTask::Regular(ref task) => {
10411038
let mut task = task.lock();
1042-
let RegularOpenTask {
1043-
ref mut reads,
1044-
ref mut read_set,
1045-
ref node,
1046-
} = *task;
10471039
self.total_read_count += 1;
10481040

1049-
let is_new_entry = if reads.spilled() {
1050-
read_set.insert(source)
1051-
} else {
1052-
if reads.as_slice().contains(&source) {
1053-
false
1054-
} else {
1055-
if reads.inline_size() == reads.len() {
1056-
read_set.reserve(16);
1057-
read_set.extend(reads.iter().cloned());
1058-
read_set.insert(source);
1059-
}
1060-
true
1061-
}
1062-
};
1063-
1064-
if is_new_entry {
1065-
reads.push(source);
1066-
debug_assert!(read_set.is_empty() ^ reads.spilled());
1067-
1041+
if task.read_set.insert(source) {
10681042
if cfg!(debug_assertions) {
10691043
if let Some(ref forbidden_edge) = self.forbidden_edge {
1070-
let target = node;
1044+
let target = &task.node;
10711045
let source = self.nodes[source];
10721046
if forbidden_edge.test(&source, &target) {
10731047
bug!("forbidden edge {:?} -> {:?} created",
@@ -1081,10 +1055,7 @@ impl CurrentDepGraph {
10811055
}
10821056
}
10831057
OpenTask::Anon(ref task) => {
1084-
let mut task = task.lock();
1085-
if task.read_set.insert(source) {
1086-
task.reads.push(source);
1087-
}
1058+
task.lock().read_set.insert(source);
10881059
}
10891060
OpenTask::Ignore | OpenTask::EvalAlways { .. } => {
10901061
// ignore
@@ -1110,13 +1081,11 @@ impl CurrentDepGraph {
11101081

11111082
pub struct RegularOpenTask {
11121083
node: DepNode,
1113-
reads: SmallVec<[DepNodeIndex; 8]>,
1114-
read_set: FxHashSet<DepNodeIndex>,
1084+
read_set: OrderedDepIndexSet,
11151085
}
11161086

11171087
pub struct AnonOpenTask {
1118-
reads: SmallVec<[DepNodeIndex; 8]>,
1119-
read_set: FxHashSet<DepNodeIndex>,
1088+
read_set: OrderedDepIndexSet,
11201089
}
11211090

11221091
pub enum OpenTask {
@@ -1128,6 +1097,87 @@ pub enum OpenTask {
11281097
},
11291098
}
11301099

1100+
struct OrderedDepIndexSet {
1101+
reads: SmallVec<[DepNodeIndex; 8]>,
1102+
read_set: FxHashSet<DepNodeIndex>,
1103+
}
1104+
1105+
impl OrderedDepIndexSet {
1106+
fn new() -> OrderedDepIndexSet {
1107+
OrderedDepIndexSet {
1108+
reads: SmallVec::from_buf_and_len([DepNodeIndex::INVALID; 8], 0),
1109+
read_set: Default::default(),
1110+
}
1111+
}
1112+
}
1113+
1114+
cfg_if! {
1115+
if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
1116+
target_feature = "sse2",
1117+
not(stage0)))] {
1118+
impl OrderedDepIndexSet {
1119+
1120+
#[inline(always)]
1121+
fn insert(&mut self, dep_node_index: DepNodeIndex) -> bool {
1122+
unsafe {
1123+
self.insert_impl(dep_node_index)
1124+
}
1125+
}
1126+
1127+
#[target_feature(enable = "sse2")]
1128+
unsafe fn insert_impl(&mut self, dep_node_index: DepNodeIndex) -> bool {
1129+
#[cfg(target_arch = "x86")]
1130+
use std::arch::x86::*;
1131+
#[cfg(target_arch = "x86_64")]
1132+
use std::arch::x86_64::*;
1133+
1134+
if self.reads.len() <= self.reads.inline_size() {
1135+
debug_assert!(dep_node_index != DepNodeIndex::INVALID);
1136+
debug_assert!(mem::size_of::<DepNodeIndex>() == 4);
1137+
debug_assert!(self.reads.capacity() == 8);
1138+
1139+
let ptr = self.reads.as_slice().as_ptr() as *const __m128i;
1140+
let data1 = _mm_loadu_si128(ptr);
1141+
let data2 = _mm_loadu_si128(ptr.offset(1));
1142+
let cmp = _mm_set1_epi32(dep_node_index.as_u32() as i32);
1143+
1144+
if (_mm_movemask_epi8(_mm_cmpeq_epi32(cmp, data1)) |
1145+
_mm_movemask_epi8(_mm_cmpeq_epi32(cmp, data2))) != 0 {
1146+
// Already contained
1147+
false
1148+
} else {
1149+
self.reads.push(dep_node_index);
1150+
1151+
if self.reads.len() > self.reads.inline_size() {
1152+
self.read_set.extend(self.reads.iter().cloned());
1153+
}
1154+
true
1155+
}
1156+
} else {
1157+
if self.read_set.insert(dep_node_index) {
1158+
self.reads.push(dep_node_index);
1159+
true
1160+
} else {
1161+
false
1162+
}
1163+
}
1164+
}
1165+
}
1166+
} else {
1167+
impl OrderedDepIndexSet {
1168+
#[inline(always)]
1169+
fn insert(&mut self, dep_node_index: DepNodeIndex) -> bool {
1170+
if self.read_set.insert(dep_node_index) {
1171+
self.reads.push(dep_node_index);
1172+
true
1173+
} else {
1174+
false
1175+
}
1176+
}
1177+
}
1178+
}
1179+
}
1180+
11311181
// A data structure that stores Option<DepNodeColor> values as a contiguous
11321182
// array, using one u32 per entry.
11331183
struct DepNodeColorMap {

src/librustc/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#![feature(in_band_lifetimes)]
7070
#![feature(crate_visibility_modifier)]
7171
#![feature(transpose_result)]
72+
#![cfg_attr(not(stage0), feature(stdsimd))]
7273

7374
#![recursion_limit="512"]
7475

@@ -108,6 +109,8 @@ extern crate backtrace;
108109

109110
#[macro_use]
110111
extern crate smallvec;
112+
#[macro_use]
113+
extern crate cfg_if;
111114

112115
// Note that librustc doesn't actually depend on these crates, see the note in
113116
// `Cargo.toml` for this crate about why these are here.

0 commit comments

Comments
 (0)