Skip to content

Commit 8d2c735

Browse files
authored
Merge pull request #16 from shepmaster/dot-delimited-demangling
Support demangling symbols with dot-delimited words at the end
2 parents 6c7afa8 + fcf9f7f commit 8d2c735

File tree

1 file changed

+63
-8
lines changed

1 file changed

+63
-8
lines changed

src/lib.rs

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,22 @@ use core::fmt;
3636
pub struct Demangle<'a> {
3737
original: &'a str,
3838
inner: &'a str,
39+
suffix: &'a str,
3940
valid: bool,
4041
/// The number of ::-separated elements in the original name.
4142
elements: usize,
4243
}
4344

4445
/// De-mangles a Rust symbol into a more readable version
4546
///
46-
/// All rust symbols by default are mangled as they contain characters that
47+
/// All Rust symbols by default are mangled as they contain characters that
4748
/// cannot be represented in all object files. The mangling mechanism is similar
4849
/// to C++'s, but Rust has a few specifics to handle items like lifetimes in
4950
/// symbols.
5051
///
51-
/// This function will take a **mangled** symbol (typically acquired from a
52-
/// `Symbol` which is in turn resolved from a `Frame`) and then writes the
53-
/// de-mangled version into the given `writer`. If the symbol does not look like
54-
/// a mangled symbol, it is still written to `writer`.
52+
/// This function will take a **mangled** symbol and return a value. When printed,
53+
/// the de-mangled version will be written. If the symbol does not look like
54+
/// a mangled symbol, the original value will be written instead.
5555
///
5656
/// # Examples
5757
///
@@ -63,7 +63,7 @@ pub struct Demangle<'a> {
6363
/// assert_eq!(demangle("foo").to_string(), "foo");
6464
/// ```
6565
66-
// All rust symbols are in theory lists of "::"-separated identifiers. Some
66+
// All Rust symbols are in theory lists of "::"-separated identifiers. Some
6767
// assemblers, however, can't handle these characters in symbol names. To get
6868
// around this, we use C++-style mangling. The mangling method is:
6969
//
@@ -82,7 +82,7 @@ pub struct Demangle<'a> {
8282
// etc. Additionally, this doesn't handle glue symbols at all.
8383
pub fn demangle(mut s: &str) -> Demangle {
8484
// During ThinLTO LLVM may import and rename internal symbols, so strip out
85-
// those endings first as they're on of the last manglings applied to symbol
85+
// those endings first as they're one of the last manglings applied to symbol
8686
// names.
8787
let llvm = ".llvm.";
8888
if let Some(i) = s.find(llvm) {
@@ -99,8 +99,20 @@ pub fn demangle(mut s: &str) -> Demangle {
9999
}
100100
}
101101

102+
// Output like LLVM IR adds extra period-delimited words. See if
103+
// we are in that case and save the trailing words if so.
104+
let mut suffix = "";
105+
if let Some(i) = s.rfind("E.") {
106+
let (head, tail) = s.split_at(i + 1); // After the E, before the period
107+
108+
if is_symbol_like(tail) {
109+
s = head;
110+
suffix = tail;
111+
}
112+
}
113+
102114
// First validate the symbol. If it doesn't look like anything we're
103-
// expecting, we just print it literally. Note that we must handle non-rust
115+
// expecting, we just print it literally. Note that we must handle non-Rust
104116
// symbols because we could have any function in the backtrace.
105117
let mut valid = true;
106118
let mut inner = s;
@@ -156,6 +168,7 @@ pub fn demangle(mut s: &str) -> Demangle {
156168

157169
Demangle {
158170
inner: inner,
171+
suffix: suffix,
159172
valid: valid,
160173
elements: elements,
161174
original: s,
@@ -203,6 +216,35 @@ fn is_rust_hash(s: &str) -> bool {
203216
s.starts_with('h') && s[1..].chars().all(|c| c.is_digit(16))
204217
}
205218

219+
fn is_symbol_like(s: &str) -> bool {
220+
s.chars().all(|c| {
221+
// Once `char::is_ascii_punctuation` and `char::is_ascii_alphanumeric`
222+
// have been stable for long enough, use those instead for clarity
223+
is_ascii_alphanumeric(c) || is_ascii_punctuation(c)
224+
})
225+
}
226+
227+
// Copied from the documentation of `char::is_ascii_alphanumeric`
228+
fn is_ascii_alphanumeric(c: char) -> bool {
229+
match c {
230+
'\u{0041}' ... '\u{005A}' |
231+
'\u{0061}' ... '\u{007A}' |
232+
'\u{0030}' ... '\u{0039}' => true,
233+
_ => false,
234+
}
235+
}
236+
237+
// Copied from the documentation of `char::is_ascii_punctuation`
238+
fn is_ascii_punctuation(c: char) -> bool {
239+
match c {
240+
'\u{0021}' ... '\u{002F}' |
241+
'\u{003A}' ... '\u{0040}' |
242+
'\u{005B}' ... '\u{0060}' |
243+
'\u{007B}' ... '\u{007E}' => true,
244+
_ => false,
245+
}
246+
}
247+
206248
impl<'a> fmt::Display for Demangle<'a> {
207249
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
208250
// Alright, let's do this.
@@ -289,6 +331,8 @@ impl<'a> fmt::Display for Demangle<'a> {
289331
}
290332
}
291333

334+
try!(f.write_str(self.suffix));
335+
292336
Ok(())
293337
}
294338
}
@@ -399,6 +443,17 @@ mod tests {
399443
t_nohash!("_ZN9backtrace3foo17hbb467fcdaea5d79bE.llvm.A5310EB9", "backtrace::foo");
400444
}
401445

446+
#[test]
447+
fn demangle_llvm_ir_branch_labels() {
448+
t!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i", "core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut::haf9727c2edfbc47b.exit.i.i");
449+
t_nohash!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i", "core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i");
450+
}
451+
452+
#[test]
453+
fn demangle_ignores_suffix_that_doesnt_look_like_a_symbol() {
454+
t!("_ZN3fooE.llvm moocow", "_ZN3fooE.llvm moocow");
455+
}
456+
402457
#[test]
403458
fn dont_panic() {
404459
super::demangle("_ZN2222222222222222222222EE").to_string();

0 commit comments

Comments
 (0)