|
| 1 | +#!/usr/bin/env python |
| 2 | +# blockifyasm ----- Split disassembly into basic blocks ---------*- python -*- |
| 3 | +# |
| 4 | +# This source file is part of the Swift.org open source project |
| 5 | +# |
| 6 | +# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| 7 | +# Licensed under Apache License v2.0 with Runtime Library Exception |
| 8 | +# |
| 9 | +# See https://swift.org/LICENSE.txt for license information |
| 10 | +# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| 11 | +# |
| 12 | +# ---------------------------------------------------------------------------- |
| 13 | +# |
| 14 | +# Splits a disassembled function from lldb into basic blocks. |
| 15 | +# |
| 16 | +# Useful to show the control flow graph of a disassembled function. |
| 17 | +# The control flow graph can the be viewed with the viewcfg utility: |
| 18 | +# |
| 19 | +# (lldb) disassemble |
| 20 | +# <copy-paste output to file.s> |
| 21 | +# $ blockifyasm < file.s | viewcfg |
| 22 | +# |
| 23 | +# ---------------------------------------------------------------------------- |
| 24 | + |
| 25 | +from __future__ import print_function |
| 26 | + |
| 27 | +import re |
| 28 | +import sys |
| 29 | +from collections import defaultdict |
| 30 | + |
| 31 | + |
| 32 | +def help(): |
| 33 | + print("""\ |
| 34 | +Usage: |
| 35 | +
|
| 36 | +blockifyasm [-<n>] < file |
| 37 | +
|
| 38 | +-<n>: only match <n> significant digits of relative branch addresses |
| 39 | +""") |
| 40 | + |
| 41 | + |
| 42 | +def main(): |
| 43 | + |
| 44 | + addr_len = 16 |
| 45 | + if len(sys.argv) >= 2: |
| 46 | + m = re.match('^-([0-9]+)$', sys.argv[1]) |
| 47 | + if m: |
| 48 | + addr_len = int(m.group(1)) |
| 49 | + else: |
| 50 | + help() |
| 51 | + return |
| 52 | + |
| 53 | + lines = [] |
| 54 | + block_starts = {} |
| 55 | + |
| 56 | + branch_re1 = re.compile('^\s[-\s>]*0x.*:\s.* 0x([0-9a-f]+)\s*;\s*<[+-]') |
| 57 | + branch_re2 = re.compile('^\s[-\s>]*0x.*:\s+tb.* 0x([0-9a-f]+)\s*(;.*)?') |
| 58 | + inst_re = re.compile('^\s[-\s>]*0x([0-9a-f]+)[\s<>0-9+-]*:\s+([a-z0-9.]+)\s') |
| 59 | + non_fall_through_insts = [ 'b', 'ret', 'brk', 'jmp', 'retq', 'ud2' ] |
| 60 | + |
| 61 | + def get_branch_addr(line): |
| 62 | + bm = branch_re1.match(line) |
| 63 | + if bm: |
| 64 | + return bm.group(1)[-addr_len:] |
| 65 | + bm = branch_re2.match(line) |
| 66 | + if bm: |
| 67 | + return bm.group(1)[-addr_len:] |
| 68 | + return None |
| 69 | + |
| 70 | + def print_function(): |
| 71 | + if not lines: |
| 72 | + return |
| 73 | + predecessors = defaultdict(list) |
| 74 | + block_num = -1 |
| 75 | + next_is_block = True |
| 76 | + prev_is_fallthrough = False |
| 77 | + |
| 78 | + # Collect predecessors for all blocks |
| 79 | + for line in lines: |
| 80 | + m = inst_re.match(line) |
| 81 | + assert m, "non instruction line in function" |
| 82 | + addr = m.group(1)[-addr_len:] |
| 83 | + inst = m.group(2) |
| 84 | + if next_is_block or addr in block_starts: |
| 85 | + if prev_is_fallthrough: |
| 86 | + predecessors[addr].append(block_num) |
| 87 | + |
| 88 | + block_num += 1 |
| 89 | + block_starts[addr] = block_num |
| 90 | + next_is_block = False |
| 91 | + |
| 92 | + prev_is_fallthrough = True |
| 93 | + br_addr = get_branch_addr(line) |
| 94 | + if br_addr: |
| 95 | + next_is_block = True |
| 96 | + predecessors[br_addr].append(block_num) |
| 97 | + |
| 98 | + prev_is_fallthrough = (not inst in non_fall_through_insts) |
| 99 | + |
| 100 | + # Print the function with basic block labels |
| 101 | + print('{') |
| 102 | + for line in lines: |
| 103 | + m = inst_re.match(line) |
| 104 | + if m: |
| 105 | + addr = m.group(1)[-addr_len:] |
| 106 | + if addr in block_starts: |
| 107 | + blockstr = 'bb' + str(block_starts[addr]) + ':' |
| 108 | + if predecessors[addr]: |
| 109 | + print(blockstr + ' ' * (55 - len(blockstr)) + '; preds = ', end='') |
| 110 | + print(', '.join('bb' + str(pred) for pred in predecessors[addr])) |
| 111 | + else: |
| 112 | + print(blockstr) |
| 113 | + |
| 114 | + br_addr = get_branch_addr(line) |
| 115 | + if br_addr and block_starts[br_addr] >= 0: |
| 116 | + line = re.sub(';\s<[+-].*', '; bb' + str(block_starts[br_addr]), line) |
| 117 | + |
| 118 | + print(line, end='') |
| 119 | + print('}') |
| 120 | + |
| 121 | + # Read disassembly code from stdin |
| 122 | + for line in sys.stdin: |
| 123 | + # let the line with the instruction pointer begin with a space |
| 124 | + line = re.sub('^-> ', ' ->', line) |
| 125 | + |
| 126 | + if inst_re.match(line): |
| 127 | + lines.append(line) |
| 128 | + br_addr = get_branch_addr(line) |
| 129 | + if br_addr: |
| 130 | + if len(br_addr) < addr_len: |
| 131 | + addr_len = len(br_addr) |
| 132 | + block_starts[br_addr] = -1 |
| 133 | + else: |
| 134 | + print_function() |
| 135 | + lines = [] |
| 136 | + block_starts = {} |
| 137 | + print(line, end='') |
| 138 | + |
| 139 | + print_function() |
| 140 | + |
| 141 | +if __name__ == '__main__': |
| 142 | + main() |
0 commit comments