Skip to content

Commit 1c50dca

Browse files
authored
Merge pull request #23 from eddyb/master
Support demangling the new Rust mangling scheme.
2 parents 8eacfc9 + 843e551 commit 1c50dca

File tree

3 files changed

+1467
-178
lines changed

3 files changed

+1467
-178
lines changed

src/legacy.rs

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
use core::fmt;
2+
3+
/// Representation of a demangled symbol name.
4+
pub struct Demangle<'a> {
5+
inner: &'a str,
6+
/// The number of ::-separated elements in the original name.
7+
elements: usize,
8+
}
9+
10+
/// De-mangles a Rust symbol into a more readable version
11+
///
12+
/// All Rust symbols by default are mangled as they contain characters that
13+
/// cannot be represented in all object files. The mangling mechanism is similar
14+
/// to C++'s, but Rust has a few specifics to handle items like lifetimes in
15+
/// symbols.
16+
///
17+
/// This function will take a **mangled** symbol and return a value. When printed,
18+
/// the de-mangled version will be written. If the symbol does not look like
19+
/// a mangled symbol, the original value will be written instead.
20+
///
21+
/// # Examples
22+
///
23+
/// ```
24+
/// use rustc_demangle::demangle;
25+
///
26+
/// assert_eq!(demangle("_ZN4testE").to_string(), "test");
27+
/// assert_eq!(demangle("_ZN3foo3barE").to_string(), "foo::bar");
28+
/// assert_eq!(demangle("foo").to_string(), "foo");
29+
/// ```
30+
31+
// All Rust symbols are in theory lists of "::"-separated identifiers. Some
32+
// assemblers, however, can't handle these characters in symbol names. To get
33+
// around this, we use C++-style mangling. The mangling method is:
34+
//
35+
// 1. Prefix the symbol with "_ZN"
36+
// 2. For each element of the path, emit the length plus the element
37+
// 3. End the path with "E"
38+
//
39+
// For example, "_ZN4testE" => "test" and "_ZN3foo3barE" => "foo::bar".
40+
//
41+
// We're the ones printing our backtraces, so we can't rely on anything else to
42+
// demangle our symbols. It's *much* nicer to look at demangled symbols, so
43+
// this function is implemented to give us nice pretty output.
44+
//
45+
// Note that this demangler isn't quite as fancy as it could be. We have lots
46+
// of other information in our symbols like hashes, version, type information,
47+
// etc. Additionally, this doesn't handle glue symbols at all.
48+
pub fn demangle(s: &str) -> Result<Demangle, ()> {
49+
// First validate the symbol. If it doesn't look like anything we're
50+
// expecting, we just print it literally. Note that we must handle non-Rust
51+
// symbols because we could have any function in the backtrace.
52+
let inner;
53+
if s.len() > 4 && s.starts_with("_ZN") && s.ends_with('E') {
54+
inner = &s[3..s.len() - 1];
55+
} else if s.len() > 3 && s.starts_with("ZN") && s.ends_with('E') {
56+
// On Windows, dbghelp strips leading underscores, so we accept "ZN...E"
57+
// form too.
58+
inner = &s[2..s.len() - 1];
59+
} else if s.len() > 5 && s.starts_with("__ZN") && s.ends_with('E') {
60+
// On OSX, symbols are prefixed with an extra _
61+
inner = &s[4..s.len() - 1];
62+
} else {
63+
return Err(());
64+
}
65+
66+
// only work with ascii text
67+
if inner.bytes().any(|c| c & 0x80 != 0) {
68+
return Err(());
69+
}
70+
71+
let mut elements = 0;
72+
let mut chars = inner.chars().peekable();
73+
loop {
74+
let mut i = 0usize;
75+
while let Some(&c) = chars.peek() {
76+
if !c.is_digit(10) {
77+
break
78+
}
79+
chars.next();
80+
let next = i.checked_mul(10)
81+
.and_then(|i| i.checked_add(c as usize - '0' as usize));
82+
i = match next {
83+
Some(i) => i,
84+
None => {
85+
return Err(());
86+
}
87+
};
88+
}
89+
90+
if i == 0 {
91+
if !chars.next().is_none() {
92+
return Err(());
93+
}
94+
break;
95+
} else if chars.by_ref().take(i).count() != i {
96+
return Err(());
97+
} else {
98+
elements += 1;
99+
}
100+
}
101+
102+
Ok(Demangle {
103+
inner: inner,
104+
elements: elements,
105+
})
106+
}
107+
108+
// Rust hashes are hex digits with an `h` prepended.
109+
fn is_rust_hash(s: &str) -> bool {
110+
s.starts_with('h') && s[1..].chars().all(|c| c.is_digit(16))
111+
}
112+
113+
impl<'a> fmt::Display for Demangle<'a> {
114+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
115+
// Alright, let's do this.
116+
let mut inner = self.inner;
117+
for element in 0..self.elements {
118+
let mut rest = inner;
119+
while rest.chars().next().unwrap().is_digit(10) {
120+
rest = &rest[1..];
121+
}
122+
let i: usize = inner[..(inner.len() - rest.len())].parse().unwrap();
123+
inner = &rest[i..];
124+
rest = &rest[..i];
125+
// Skip printing the hash if alternate formatting
126+
// was requested.
127+
if f.alternate() && element+1 == self.elements && is_rust_hash(&rest) {
128+
break;
129+
}
130+
if element != 0 {
131+
try!(f.write_str("::"));
132+
}
133+
if rest.starts_with("_$") {
134+
rest = &rest[1..];
135+
}
136+
while !rest.is_empty() {
137+
if rest.starts_with('.') {
138+
if let Some('.') = rest[1..].chars().next() {
139+
try!(f.write_str("::"));
140+
rest = &rest[2..];
141+
} else {
142+
try!(f.write_str("."));
143+
rest = &rest[1..];
144+
}
145+
} else if rest.starts_with('$') {
146+
macro_rules! demangle {
147+
($($pat:expr => $demangled:expr,)*) => ({
148+
$(if rest.starts_with($pat) {
149+
try!(f.write_str($demangled));
150+
rest = &rest[$pat.len()..];
151+
} else)*
152+
{
153+
try!(f.write_str(rest));
154+
break;
155+
}
156+
157+
})
158+
}
159+
160+
// see src/librustc/back/link.rs for these mappings
161+
demangle! {
162+
"$SP$" => "@",
163+
"$BP$" => "*",
164+
"$RF$" => "&",
165+
"$LT$" => "<",
166+
"$GT$" => ">",
167+
"$LP$" => "(",
168+
"$RP$" => ")",
169+
"$C$" => ",",
170+
171+
// in theory we can demangle any Unicode code point, but
172+
// for simplicity we just catch the common ones.
173+
"$u7e$" => "~",
174+
"$u20$" => " ",
175+
"$u27$" => "'",
176+
"$u3d$" => "=",
177+
"$u5b$" => "[",
178+
"$u5d$" => "]",
179+
"$u7b$" => "{",
180+
"$u7d$" => "}",
181+
"$u3b$" => ";",
182+
"$u2b$" => "+",
183+
"$u21$" => "!",
184+
"$u22$" => "\"",
185+
}
186+
} else {
187+
let idx = match rest.char_indices().find(|&(_, c)| c == '$' || c == '.') {
188+
None => rest.len(),
189+
Some((i, _)) => i,
190+
};
191+
try!(f.write_str(&rest[..idx]));
192+
rest = &rest[idx..];
193+
}
194+
}
195+
}
196+
197+
Ok(())
198+
}
199+
}
200+
201+
#[cfg(test)]
202+
mod tests {
203+
use std::prelude::v1::*;
204+
205+
macro_rules! t {
206+
($a:expr, $b:expr) => (assert!(ok($a, $b)))
207+
}
208+
209+
macro_rules! t_err {
210+
($a:expr) => (assert!(ok_err($a)))
211+
}
212+
213+
macro_rules! t_nohash {
214+
($a:expr, $b:expr) => ({
215+
assert_eq!(format!("{:#}", ::demangle($a)), $b);
216+
})
217+
}
218+
219+
fn ok(sym: &str, expected: &str) -> bool {
220+
match ::try_demangle(sym) {
221+
Ok(s) => {
222+
if s.to_string() == expected {
223+
true
224+
} else {
225+
println!("\n{}\n!=\n{}\n", s, expected);
226+
false
227+
}
228+
}
229+
Err(_) => {
230+
println!("error demangling");
231+
false
232+
}
233+
}
234+
}
235+
236+
fn ok_err(sym: &str) -> bool {
237+
match ::try_demangle(sym) {
238+
Ok(_) => {
239+
println!("succeeded in demangling");
240+
false
241+
}
242+
Err(_) => ::demangle(sym).to_string() == sym,
243+
}
244+
}
245+
246+
#[test]
247+
fn demangle() {
248+
t_err!("test");
249+
t!("_ZN4testE", "test");
250+
t_err!("_ZN4test");
251+
t!("_ZN4test1a2bcE", "test::a::bc");
252+
}
253+
254+
#[test]
255+
fn demangle_dollars() {
256+
t!("_ZN4$RP$E", ")");
257+
t!("_ZN8$RF$testE", "&test");
258+
t!("_ZN8$BP$test4foobE", "*test::foob");
259+
t!("_ZN9$u20$test4foobE", " test::foob");
260+
t!("_ZN35Bar$LT$$u5b$u32$u3b$$u20$4$u5d$$GT$E", "Bar<[u32; 4]>");
261+
}
262+
263+
#[test]
264+
fn demangle_many_dollars() {
265+
t!("_ZN13test$u20$test4foobE", "test test::foob");
266+
t!("_ZN12test$BP$test4foobE", "test*test::foob");
267+
}
268+
269+
270+
#[test]
271+
fn demangle_osx() {
272+
t!("__ZN5alloc9allocator6Layout9for_value17h02a996811f781011E", "alloc::allocator::Layout::for_value::h02a996811f781011");
273+
t!("__ZN38_$LT$core..option..Option$LT$T$GT$$GT$6unwrap18_MSG_FILE_LINE_COL17haf7cb8d5824ee659E", "<core::option::Option<T>>::unwrap::_MSG_FILE_LINE_COL::haf7cb8d5824ee659");
274+
t!("__ZN4core5slice89_$LT$impl$u20$core..iter..traits..IntoIterator$u20$for$u20$$RF$$u27$a$u20$$u5b$T$u5d$$GT$9into_iter17h450e234d27262170E", "core::slice::<impl core::iter::traits::IntoIterator for &'a [T]>::into_iter::h450e234d27262170");
275+
}
276+
277+
#[test]
278+
fn demangle_windows() {
279+
t!("ZN4testE", "test");
280+
t!("ZN13test$u20$test4foobE", "test test::foob");
281+
t!("ZN12test$RF$test4foobE", "test&test::foob");
282+
}
283+
284+
#[test]
285+
fn demangle_elements_beginning_with_underscore() {
286+
t!("_ZN13_$LT$test$GT$E", "<test>");
287+
t!("_ZN28_$u7b$$u7b$closure$u7d$$u7d$E", "{{closure}}");
288+
t!("_ZN15__STATIC_FMTSTRE", "__STATIC_FMTSTR");
289+
}
290+
291+
#[test]
292+
fn demangle_trait_impls() {
293+
t!("_ZN71_$LT$Test$u20$$u2b$$u20$$u27$static$u20$as$u20$foo..Bar$LT$Test$GT$$GT$3barE",
294+
"<Test + 'static as foo::Bar<Test>>::bar");
295+
}
296+
297+
#[test]
298+
fn demangle_without_hash() {
299+
let s = "_ZN3foo17h05af221e174051e9E";
300+
t!(s, "foo::h05af221e174051e9");
301+
t_nohash!(s, "foo");
302+
}
303+
304+
#[test]
305+
fn demangle_without_hash_edgecases() {
306+
// One element, no hash.
307+
t_nohash!("_ZN3fooE", "foo");
308+
// Two elements, no hash.
309+
t_nohash!("_ZN3foo3barE", "foo::bar");
310+
// Longer-than-normal hash.
311+
t_nohash!("_ZN3foo20h05af221e174051e9abcE", "foo");
312+
// Shorter-than-normal hash.
313+
t_nohash!("_ZN3foo5h05afE", "foo");
314+
// Valid hash, but not at the end.
315+
t_nohash!("_ZN17h05af221e174051e93fooE", "h05af221e174051e9::foo");
316+
// Not a valid hash, missing the 'h'.
317+
t_nohash!("_ZN3foo16ffaf221e174051e9E", "foo::ffaf221e174051e9");
318+
// Not a valid hash, has a non-hex-digit.
319+
t_nohash!("_ZN3foo17hg5af221e174051e9E", "foo::hg5af221e174051e9");
320+
}
321+
322+
#[test]
323+
fn demangle_thinlto() {
324+
// One element, no hash.
325+
t!("_ZN3fooE.llvm.9D1C9369", "foo");
326+
t!("_ZN3fooE.llvm.9D1C9369@@16", "foo");
327+
t_nohash!("_ZN9backtrace3foo17hbb467fcdaea5d79bE.llvm.A5310EB9", "backtrace::foo");
328+
}
329+
330+
#[test]
331+
fn demangle_llvm_ir_branch_labels() {
332+
t!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i", "core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut::haf9727c2edfbc47b.exit.i.i");
333+
t_nohash!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i", "core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i");
334+
}
335+
336+
#[test]
337+
fn demangle_ignores_suffix_that_doesnt_look_like_a_symbol() {
338+
t_err!("_ZN3fooE.llvm moocow");
339+
}
340+
341+
#[test]
342+
fn dont_panic() {
343+
::demangle("_ZN2222222222222222222222EE").to_string();
344+
::demangle("_ZN5*70527e27.ll34csaғE").to_string();
345+
::demangle("_ZN5*70527a54.ll34_$b.1E").to_string();
346+
::demangle("\
347+
_ZN5~saäb4e\n\
348+
2734cOsbE\n\
349+
5usage20h)3\0\0\0\0\0\0\07e2734cOsbE\
350+
").to_string();
351+
}
352+
353+
#[test]
354+
fn invalid_no_chop() {
355+
t_err!("_ZNfooE");
356+
}
357+
358+
#[test]
359+
fn handle_assoc_types() {
360+
t!("_ZN151_$LT$alloc..boxed..Box$LT$alloc..boxed..FnBox$LT$A$C$$u20$Output$u3d$R$GT$$u20$$u2b$$u20$$u27$a$GT$$u20$as$u20$core..ops..function..FnOnce$LT$A$GT$$GT$9call_once17h69e8f44b3723e1caE", "<alloc::boxed::Box<alloc::boxed::FnBox<A, Output=R> + 'a> as core::ops::function::FnOnce<A>>::call_once::h69e8f44b3723e1ca");
361+
}
362+
363+
#[test]
364+
fn handle_bang() {
365+
t!(
366+
"_ZN88_$LT$core..result..Result$LT$$u21$$C$$u20$E$GT$$u20$as$u20$std..process..Termination$GT$6report17hfc41d0da4a40b3e8E",
367+
"<core::result::Result<!, E> as std::process::Termination>::report::hfc41d0da4a40b3e8"
368+
);
369+
}
370+
}

0 commit comments

Comments
 (0)