Skip to content

Get rid of double indirection in string interner #37132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion src/liballoc/rc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,14 @@ use core::hash::{Hash, Hasher};
use core::intrinsics::{abort, assume};
use core::marker;
use core::marker::Unsize;
use core::mem::{self, align_of_val, forget, size_of_val, uninitialized};
use core::mem::{self, align_of_val, forget, size_of, size_of_val, uninitialized};
use core::ops::Deref;
use core::ops::CoerceUnsized;
use core::ptr::{self, Shared};
use core::convert::From;

use heap::deallocate;
use raw_vec::RawVec;

struct RcBox<T: ?Sized> {
strong: Cell<usize>,
Expand Down Expand Up @@ -365,6 +366,31 @@ impl<T> Rc<T> {
}
}

impl Rc<str> {
/// Constructs a new `Rc<str>` from a string slice.
#[doc(hidden)]
#[unstable(feature = "rustc_private",
reason = "for internal use in rustc",
issue = "0")]
pub fn __from_str(value: &str) -> Rc<str> {
unsafe {
// Allocate enough space for `RcBox<str>`.
let aligned_len = 2 + (value.len() + size_of::<usize>() - 1) / size_of::<usize>();
let vec = RawVec::<usize>::with_capacity(aligned_len);
let ptr = vec.ptr();
forget(vec);
// Initialize fields of `RcBox<str>`.
*ptr.offset(0) = 1; // strong: Cell::new(1)
*ptr.offset(1) = 1; // weak: Cell::new(1)
ptr::copy_nonoverlapping(value.as_ptr(), ptr.offset(2) as *mut u8, value.len());
// Combine the allocation address and the string length into a fat pointer to `RcBox`.
let rcbox_ptr: *mut RcBox<str> = mem::transmute([ptr as usize, value.len()]);
assert!(aligned_len * size_of::<usize>() == size_of_val(&*rcbox_ptr));
Rc { ptr: Shared::new(rcbox_ptr) }
}
}
}

impl<T: ?Sized> Rc<T> {
/// Creates a new [`Weak`][weak] pointer to this value.
///
Expand Down
6 changes: 3 additions & 3 deletions src/librustc_trans/debuginfo/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ impl<'tcx> TypeMap<'tcx> {

// Get the string representation of a UniqueTypeId. This method will fail if
// the id is unknown.
fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> Rc<String> {
fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> Rc<str> {
let UniqueTypeId(interner_key) = unique_type_id;
self.unique_id_interner.get(interner_key)
}
Expand Down Expand Up @@ -299,7 +299,7 @@ impl<'tcx> TypeMap<'tcx> {
// Trim to size before storing permanently
unique_type_id.shrink_to_fit();

let key = self.unique_id_interner.intern(unique_type_id);
let key = self.unique_id_interner.intern(&unique_type_id);
self.type_to_unique_id.insert(type_, UniqueTypeId(key));

return UniqueTypeId(key);
Expand Down Expand Up @@ -367,7 +367,7 @@ impl<'tcx> TypeMap<'tcx> {
let enum_variant_type_id = format!("{}::{}",
&self.get_unique_type_id_as_string(enum_type_id),
variant_name);
let interner_key = self.unique_id_interner.intern(enum_variant_type_id);
let interner_key = self.unique_id_interner.intern(&enum_variant_type_id);
UniqueTypeId(interner_key)
}
}
Expand Down
13 changes: 3 additions & 10 deletions src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,27 +478,20 @@ pub fn clear_ident_interner() {
/// somehow.
#[derive(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)]
pub struct InternedString {
string: Rc<String>,
string: Rc<str>,
}

impl InternedString {
#[inline]
pub fn new(string: &'static str) -> InternedString {
InternedString {
string: Rc::new(string.to_owned()),
}
}

#[inline]
fn new_from_rc_str(string: Rc<String>) -> InternedString {
InternedString {
string: string,
string: Rc::__from_str(string),
}
}

#[inline]
pub fn new_from_name(name: ast::Name) -> InternedString {
with_ident_interner(|interner| InternedString::new_from_rc_str(interner.get(name)))
with_ident_interner(|interner| InternedString { string: interner.get(name) })
}
}

Expand Down
40 changes: 15 additions & 25 deletions src/libsyntax/util/interner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,13 @@

use ast::Name;

use std::borrow::Borrow;
use std::collections::HashMap;
use std::rc::Rc;

#[derive(PartialEq, Eq, Hash)]
struct RcStr(Rc<String>);

impl Borrow<str> for RcStr {
fn borrow(&self) -> &str {
&self.0
}
}

#[derive(Default)]
pub struct Interner {
names: HashMap<RcStr, Name>,
strings: Vec<Rc<String>>,
names: HashMap<Rc<str>, Name>,
strings: Vec<Rc<str>>,
}

/// When traits can extend traits, we should extend index<Name,T> to get []
Expand All @@ -47,22 +37,22 @@ impl Interner {
this
}

pub fn intern<T: Borrow<str> + Into<String>>(&mut self, string: T) -> Name {
if let Some(&name) = self.names.get(string.borrow()) {
pub fn intern(&mut self, string: &str) -> Name {
if let Some(&name) = self.names.get(string) {
return name;
}

let name = Name(self.strings.len() as u32);
let string = Rc::new(string.into());
let string = Rc::__from_str(string);
self.strings.push(string.clone());
self.names.insert(RcStr(string), name);
self.names.insert(string, name);
name
}

pub fn gensym(&mut self, string: &str) -> Name {
let gensym = Name(self.strings.len() as u32);
// leave out of `names` to avoid colliding
self.strings.push(Rc::new(string.to_owned()));
self.strings.push(Rc::__from_str(string));
gensym
}

Expand All @@ -75,7 +65,7 @@ impl Interner {
gensym
}

pub fn get(&self, name: Name) -> Rc<String> {
pub fn get(&self, name: Name) -> Rc<str> {
self.strings[name.0 as usize].clone()
}

Expand Down Expand Up @@ -109,13 +99,13 @@ mod tests {
assert_eq!(i.gensym("dog"), Name(4));
// gensym tests again with gensym_copy:
assert_eq!(i.gensym_copy(Name(2)), Name(5));
assert_eq!(*i.get(Name(5)), "zebra");
assert_eq!(&*i.get(Name(5)), "zebra");
assert_eq!(i.gensym_copy(Name(2)), Name(6));
assert_eq!(*i.get(Name(6)), "zebra");
assert_eq!(*i.get(Name(0)), "dog");
assert_eq!(*i.get(Name(1)), "cat");
assert_eq!(*i.get(Name(2)), "zebra");
assert_eq!(*i.get(Name(3)), "zebra");
assert_eq!(*i.get(Name(4)), "dog");
assert_eq!(&*i.get(Name(6)), "zebra");
assert_eq!(&*i.get(Name(0)), "dog");
assert_eq!(&*i.get(Name(1)), "cat");
assert_eq!(&*i.get(Name(2)), "zebra");
assert_eq!(&*i.get(Name(3)), "zebra");
assert_eq!(&*i.get(Name(4)), "dog");
}
}