3 changed files with 460 additions and 14 deletions
@ -0,0 +1,454 @@
|
||||
# This script renders a graph of the CircuitPython rom image. |
||||
# It takes the single elf file and uses objdump to get its contents. |
||||
|
||||
import pygraphviz as pgv |
||||
import click |
||||
import sh |
||||
|
||||
# Replace dashes with underscores |
||||
objdump = sh.arm_none_eabi_objdump |
||||
|
||||
def parse_hex(h): |
||||
return int("0x" + h, 0) |
||||
|
||||
BAD_JUMPS = ["UNPREDICTABLE", "_etext"] |
||||
|
||||
SPECIAL_NODE_COLORS = { |
||||
"main": "pink", |
||||
"exception_table": "green" |
||||
} |
||||
|
||||
@click.command() |
||||
@click.argument("elf_filename") |
||||
def do_all_the_things(elf_filename): |
||||
symbol = None |
||||
last_address = 0 |
||||
all_symbols = {} |
||||
symbols_by_debug_address = {} |
||||
symbols_by_memory_address = {} |
||||
symbols_by_linkage_name = {} |
||||
# Gather type info so we know how to treat the disassembly |
||||
debug_dump = objdump("--dwarf=info", elf_filename) |
||||
debug_dump_lines = debug_dump.stdout.decode("utf-8").split("\n") |
||||
symbol_stack = [] |
||||
symbol = None |
||||
ignore = False |
||||
min_call_site_param = 0x20000000 |
||||
for line in debug_dump_lines: |
||||
if not line: |
||||
continue |
||||
parts = line.split() |
||||
if line[1] == "<": |
||||
if parts[-1] == "0": |
||||
symbol = symbol_stack.pop() |
||||
continue |
||||
debug_type = parts[-1].strip("()") |
||||
ignore = False |
||||
# skip info about function parameters |
||||
if debug_type == "DW_TAG_formal_parameter": |
||||
ignore = True |
||||
depth = int(parts[0].split(">")[0].strip("<")) |
||||
if len(symbol_stack) == (depth - 1) and depth > 0: |
||||
symbol_stack.append(symbol) |
||||
elif symbol and "name" in symbol: |
||||
if symbol["debug_type"] == "DW_TAG_variable": |
||||
if "start_address" not in symbol: |
||||
pass |
||||
else: |
||||
symbols_by_memory_address[symbol["start_address"]] = symbol |
||||
elif symbol["debug_type"] in ["DW_TAG_member", "DW_TAG_label", "DW_TAG_typedef", "DW_TAG_enumerator", "DW_TAG_enumeration_type", "DW_TAG_base_type", "DW_TAG_structure_type", "DW_TAG_compile_unit", "DW_TAG_union_type"]: |
||||
# skip symbols that don't end up in memory. the type info is available through the debug address map |
||||
pass |
||||
else: |
||||
if symbol["name"] in all_symbols: |
||||
# print(depth, symbol["name"]) |
||||
# print(symbol) |
||||
# print(all_symbols[symbol["name"]]) |
||||
# print() |
||||
pass |
||||
all_symbols[symbol["name"]] = symbol |
||||
elif symbol and symbol["debug_type"] == "DW_TAG_GNU_call_site_parameter" and "call_site_value" in symbol: |
||||
parent = -1 |
||||
while symbol_stack[parent]["debug_type"] != "DW_TAG_subprogram": |
||||
parent -= 1 |
||||
parent = symbol_stack[parent] |
||||
|
||||
# Only point to ROM |
||||
addr = symbol["call_site_value"] |
||||
if 0x2000 <= addr < 0x20000000: |
||||
if "outgoing_pointers" not in parent: |
||||
parent["outgoing_pointers"] = set() |
||||
parent["outgoing_pointers"].add(addr) |
||||
if addr not in symbols_by_memory_address: |
||||
symbols_by_memory_address[addr] = symbol |
||||
min_call_site_param = min(addr, min_call_site_param) |
||||
symbol["name"] = "name{:x}".format(addr) |
||||
address = parse_hex(parts[0].split("<")[-1].strip(">:")) |
||||
symbol = {"debug_address": address, "debug_type": debug_type, "other": []} |
||||
if debug_type == "DW_TAG_structure_type": |
||||
symbol["struct"] = {} |
||||
elif debug_type == "DW_TAG_array_type": |
||||
symbol["subtype"] = None |
||||
symbol["bound_count"] = 0 |
||||
symbol["maxlen"] = 0 |
||||
elif debug_type == "DW_TAG_subrange_type": |
||||
symbol_stack[-1]["subtype"] = symbol |
||||
symbols_by_debug_address[address] = symbol |
||||
elif ignore: |
||||
continue |
||||
elif line[:4] == " ": |
||||
tag = parts[1].strip(":") |
||||
if tag == "DW_AT_name": |
||||
symbol["name"] = parts[-1] |
||||
elif tag == "DW_AT_type": |
||||
symbol["type"] = int(parts[-1].strip("<>"), 0) |
||||
if symbol["debug_type"] == "DW_TAG_subrange_type": |
||||
if not symbol_stack[-1]["subtype"]: |
||||
symbol_stack[-1]["subtype"] = symbol |
||||
elif symbol_stack[-1]["subtype"]["type"] == symbol["type"]: |
||||
second_subtype = True |
||||
else: |
||||
raise RuntimeError() |
||||
elif tag == "DW_AT_upper_bound": |
||||
# Skip arrays with length defined by other variables |
||||
if parts[-1][0] != "<": |
||||
upper_bound = int(parts[-1]) |
||||
if symbol_stack[-1]["bound_count"] > 0: |
||||
symbol_stack[-1]["maxlen"] *= upper_bound + 1 |
||||
else: |
||||
symbol_stack[-1]["maxlen"] = upper_bound + 1 |
||||
symbol_stack[-1]["bound_count"] += 1 |
||||
elif tag == "DW_AT_byte_size": |
||||
symbol["size"] = int(parts[-1]) |
||||
elif tag == "DW_AT_inline": |
||||
symbol["inlined"] = True |
||||
elif tag == "DW_AT_low_pc": |
||||
addr = int(parts[-1], 0) |
||||
symbols_by_memory_address[addr] = symbol |
||||
elif tag == "DW_AT_location": |
||||
if parts[-2] == "(DW_OP_addr:": |
||||
addr = parse_hex(parts[-1].strip(")")) |
||||
if addr > 0: |
||||
symbol["start_address"] = addr |
||||
elif tag == "DW_AT_linkage_name": |
||||
symbol["linkage_name"] = parts[-1] |
||||
symbols_by_linkage_name[symbol["linkage_name"]] = symbol |
||||
elif tag == "DW_AT_data_member_location": |
||||
symbol_stack[-1]["struct"][int(parts[-1])] = symbol |
||||
elif tag == "DW_AT_GNU_call_site_value": |
||||
if parts[-2] == "(DW_OP_addr:": |
||||
symbol["call_site_value"] = parse_hex(parts[-1].strip(")")) |
||||
else: |
||||
symbol["other"].append(line) |
||||
#print(parts) |
||||
pass |
||||
else: |
||||
#print(line) |
||||
pass |
||||
|
||||
MEMORY_NONE = 0 |
||||
MEMORY_POINTER = 1 |
||||
MEMORY_PY_OBJECT = 2 |
||||
|
||||
def get_size(t): |
||||
if "size" in t: |
||||
return t["size"] |
||||
return get_size(symbols_by_debug_address[t["type"]]) |
||||
|
||||
def get_pointer_map(t, depth=0): |
||||
if t["debug_type"] == "DW_TAG_pointer_type": |
||||
return {0: MEMORY_POINTER} |
||||
elif t["debug_type"] in ["DW_TAG_const_type", "DW_TAG_typedef", "DW_TAG_member", "DW_TAG_subrange_type", "DW_TAG_volatile_type"]: |
||||
if "name" in t and t["name"] == "mp_rom_obj_t": |
||||
return {0: MEMORY_PY_OBJECT} |
||||
return get_pointer_map(symbols_by_debug_address[t["type"]], depth+1) |
||||
elif t["debug_type"] in ["DW_TAG_base_type", "DW_TAG_enumeration_type"]: |
||||
return {} |
||||
elif t["debug_type"] == "DW_TAG_union_type": |
||||
# skip for now |
||||
return {} |
||||
elif "struct" in t: |
||||
combined_map = {} |
||||
for offset in t["struct"]: |
||||
member = t["struct"][offset] |
||||
submap = get_pointer_map(member) |
||||
for suboffset in submap: |
||||
combined_map[offset + suboffset] = submap[suboffset] |
||||
return combined_map |
||||
elif "subtype" in t: |
||||
subtype = symbols_by_debug_address[t["type"]] |
||||
pmap = get_pointer_map(subtype, depth+1) |
||||
size = get_size(subtype) |
||||
expanded_map = {} |
||||
for i in range(t["maxlen"]): |
||||
for offset in pmap: |
||||
expanded_map[size * i + offset] = pmap[offset] |
||||
return expanded_map |
||||
else: |
||||
print("no recurse", t) |
||||
pass |
||||
return {} |
||||
|
||||
# Do a second pass to dereference the types |
||||
for symbol_address in symbols_by_memory_address: |
||||
symbol = symbols_by_memory_address[symbol_address] |
||||
if "type" in symbol: |
||||
if symbol["debug_type"] == "DW_TAG_variable": |
||||
symbol["pointer_map"] = get_pointer_map(symbols_by_debug_address[symbol["type"]]) |
||||
type_string = [] |
||||
t = symbol["type"] |
||||
offset = [] |
||||
while t != None: |
||||
t_symbol = symbols_by_debug_address[t] |
||||
t = t_symbol.get("type", None) |
||||
if "name" in t_symbol: |
||||
type_string.append(t_symbol["name"]) |
||||
elif t_symbol["debug_type"] == "DW_TAG_array_type": |
||||
type_string.append("[]") |
||||
elif t_symbol["debug_type"] == "DW_TAG_pointer_type": |
||||
type_string.append("*") |
||||
elif t_symbol["debug_type"] == "DW_TAG_const_type": |
||||
type_string.append("const") |
||||
elif t_symbol["debug_type"] == "DW_TAG_volatile_type": |
||||
type_string.append("volatile") |
||||
else: |
||||
#print(" ", t_symbol) |
||||
pass |
||||
type_string.reverse() |
||||
symbol["type_string"] = " ".join(type_string) |
||||
#print(symbol_name, symbol["debug_type"], symbol.get("type_string", "")) |
||||
|
||||
# print() |
||||
# print() |
||||
# print(all_symbols["mp_builtin_module_table"]) |
||||
# return |
||||
|
||||
# Gather size and call info |
||||
text_dump = objdump("-Dz", "-j", ".text", elf_filename) |
||||
text_dump_lines = text_dump.stdout.decode("utf-8").split("\n") |
||||
section = None |
||||
symbol = None |
||||
symbol_type = None |
||||
for line in text_dump_lines[4:]: |
||||
if line.startswith("Disassembly of section"): |
||||
section = line.split()[-1].strip(":") |
||||
elif not line: |
||||
if symbol and "end_address" not in symbol: |
||||
symbol["end_address"] = last_address |
||||
symbol["size"] = last_address - symbol["start_address"] |
||||
symbol = None |
||||
continue |
||||
elif line[0].isnumeric(): |
||||
symbol_address, symbol_name = line.split() |
||||
symbol_address = parse_hex(symbol_address) |
||||
symbol_name = symbol_name.strip("<>:") |
||||
if symbol_name in symbols_by_linkage_name: |
||||
linked_name = symbol_name |
||||
symbol = symbols_by_linkage_name[symbol_name] |
||||
if "name" in symbol: |
||||
non_linkage = symbol["name"] |
||||
if not non_linkage.startswith("__builtin"): |
||||
symbol_name = non_linkage |
||||
all_symbols[symbol_name] = symbol |
||||
if "name" not in symbol: |
||||
symbol["name"] = symbol_name |
||||
elif symbol_address in symbols_by_memory_address: |
||||
all_symbols[symbol_name] = symbols_by_memory_address[symbol_address] |
||||
if "name" not in all_symbols[symbol_name]: |
||||
all_symbols[symbol_name]["name"] = symbol_name |
||||
elif symbol_name not in all_symbols: |
||||
if symbol_name == "nlr_push_tail_var": |
||||
fake_type = all_symbols["mp_obj_get_type"]["type"] |
||||
symbol = {"debug_type": "DW_TAG_variable", "name": symbol_name, "type": fake_type} |
||||
else: |
||||
print(line) |
||||
print(symbol_name, symbol_address) |
||||
symbol = {"debug_type": "DW_TAG_subprogram", "name": symbol_name} |
||||
all_symbols[symbol_name] = symbol |
||||
#raise RuntimeError() |
||||
|
||||
symbol = all_symbols[symbol_name] |
||||
symbol["start_address"] = symbol_address |
||||
symbols_by_memory_address[symbol_address] = symbol |
||||
symbol["section"] = section |
||||
|
||||
if symbol["debug_type"] == "DW_TAG_subprogram": |
||||
symbol["outgoing_jumps"] = set() |
||||
symbol["incoming_jumps"] = set() |
||||
symbol_type = None |
||||
elif symbol["debug_type"] == "DW_TAG_variable": |
||||
symbol["outgoing_pointers"] = set() |
||||
symbol_type = symbols_by_debug_address[symbol["type"]] |
||||
all_symbols[symbol_name] = symbol |
||||
|
||||
elif line[0] == " ": |
||||
parts = line.strip().split() |
||||
last_address = parse_hex(parts[0].strip(":")) |
||||
|
||||
offset = last_address - symbol["start_address"] |
||||
if "pointer_map" in symbol: |
||||
if offset not in symbol["pointer_map"]: |
||||
#print(offset, symbol) |
||||
pass |
||||
else: |
||||
ref = parse_hex(parts[1]) |
||||
pointer_style = symbol["pointer_map"][offset] |
||||
if pointer_style == MEMORY_POINTER: |
||||
symbol["outgoing_pointers"].add(ref & 0xfffffffe) |
||||
elif pointer_style == MEMORY_PY_OBJECT and ref & 0x3 == 0: |
||||
symbol["outgoing_pointers"].add(ref) |
||||
if len(parts[1]) == 8 and parts[1][0] == "0": |
||||
addr = parse_hex(parts[1]) |
||||
if 0x2000 <= addr < 0x20000000: |
||||
if "outgoing_pointers" not in symbol: |
||||
symbol["outgoing_pointers"] = set() |
||||
symbol["outgoing_pointers"].add(addr) |
||||
elif "<" in line and symbol["debug_type"] == "DW_TAG_subprogram": |
||||
if line[-1] == ">": |
||||
jump_to = parts[-1].strip("<>").split("+")[0] |
||||
if "name" not in symbol: |
||||
print(jump_to) |
||||
print(symbol) |
||||
if jump_to != symbol["name"] and jump_to not in BAD_JUMPS: |
||||
symbol["outgoing_jumps"].add(jump_to) |
||||
#print(symbol_name, jump_to) |
||||
if jump_to == "_etext": |
||||
print(line) |
||||
elif "UNDEFINED" in line: |
||||
continue |
||||
elif parts[2] == "ldr": |
||||
continue |
||||
else: |
||||
print(line) |
||||
else: |
||||
#print(line) |
||||
pass |
||||
|
||||
# print() |
||||
print(hex(min_call_site_param)) |
||||
print(all_symbols["exception_table"]) |
||||
# return |
||||
|
||||
print("converting outgoing pointers to names") |
||||
|
||||
# Convert outgoing pointers to names from addresses |
||||
for symbol_name in all_symbols: |
||||
symbol = all_symbols[symbol_name] |
||||
if "outgoing_pointers" not in symbol: |
||||
continue |
||||
converted = set() |
||||
for outgoing in symbol["outgoing_pointers"]: |
||||
if outgoing in symbols_by_memory_address: |
||||
outgoing = symbols_by_memory_address[outgoing] |
||||
#print(outgoing) |
||||
if outgoing["debug_type"] in ["DW_TAG_GNU_call_site", "DW_TAG_lexical_block"]: |
||||
continue |
||||
if outgoing["name"] == "audioio_wavefile_type": |
||||
print(outgoing) |
||||
converted.add(outgoing["name"]) |
||||
symbol["outgoing_pointers"] = converted |
||||
|
||||
print("linking back") |
||||
# Link back |
||||
for symbol_name in all_symbols: |
||||
symbol = all_symbols[symbol_name] |
||||
if "outgoing_jumps" in symbol: |
||||
for outgoing in symbol["outgoing_jumps"]: |
||||
if outgoing not in all_symbols: |
||||
#print(outgoing, symbol_name) |
||||
continue |
||||
#print(all_symbols[outgoing], symbol_name) |
||||
|
||||
referenced_symbol = all_symbols[outgoing] |
||||
if "incoming_jumps" not in referenced_symbol: |
||||
#print(symbol_name, "->", outgoing) |
||||
referenced_symbol["incoming_jumps"] = set() |
||||
referenced_symbol["incoming_jumps"].add(symbol_name) |
||||
if "outgoing_pointers" in symbol: |
||||
for outgoing in symbol["outgoing_pointers"]: |
||||
if outgoing not in all_symbols: |
||||
#print(outgoing, symbol_name) |
||||
continue |
||||
#print(all_symbols[outgoing], symbol_name) |
||||
|
||||
referenced_symbol = all_symbols[outgoing] |
||||
if "incoming_pointers" not in referenced_symbol: |
||||
#print(symbol_name, "->", outgoing) |
||||
referenced_symbol["incoming_pointers"] = set() |
||||
referenced_symbol["incoming_pointers"].add(symbol_name) |
||||
|
||||
print(all_symbols["exception_table"]) |
||||
|
||||
# Chart it all |
||||
print("charting {} symbols".format(len(all_symbols))) |
||||
callgraph = pgv.AGraph(directed=True) |
||||
for i, symbol_name in enumerate(all_symbols): |
||||
symbol = all_symbols[symbol_name] |
||||
# print(i, symbol_name) |
||||
# if "outgoing_jumps" in symbol: |
||||
# print(" ", len(symbol["outgoing_jumps"]), "jumps") |
||||
# if "outgoing_pointers" in symbol: |
||||
# print(" ", len(symbol["outgoing_pointers"]), "ptrs") |
||||
# if i > 3000: |
||||
# break |
||||
if ("incoming_jumps" not in symbol or len(symbol["incoming_jumps"]) == 0) and ("incoming_pointers" not in symbol or len(symbol["incoming_pointers"]) == 0): |
||||
#print(symbol_name) |
||||
continue |
||||
if "start_address" not in symbol: |
||||
continue |
||||
callgraph.add_node(symbol_name) |
||||
if "outgoing_jumps" in symbol: |
||||
for outgoing in symbol["outgoing_jumps"]: |
||||
callgraph.add_edge(symbol_name, outgoing) |
||||
if "outgoing_pointers" in symbol: |
||||
for outgoing in symbol["outgoing_pointers"]: |
||||
callgraph.add_edge(symbol_name, outgoing, color="red") |
||||
#print(symbol_name, symbol) |
||||
|
||||
# Style all of the nodes |
||||
print("styling") |
||||
for node in callgraph.iternodes(): |
||||
if node.name not in all_symbols: |
||||
continue |
||||
symbol = all_symbols[node.name] |
||||
node.attr["shape"] = "box" |
||||
text_width_ish = len(node.name) * 0.1 |
||||
if "size" not in symbol: |
||||
print(symbol) |
||||
size = symbol["size"] / 8 |
||||
square_size = size ** 0.5 |
||||
if text_width_ish > square_size: |
||||
w = text_width_ish |
||||
h = size / text_width_ish |
||||
else: |
||||
w = square_size |
||||
h = square_size |
||||
node.attr["width"] = w |
||||
node.attr["height"] = h |
||||
node.attr["label"] = node.name + "\r\n" + str(symbol["size"]) + " bytes" |
||||
node.attr["style"] = "filled" |
||||
|
||||
incoming = 0 |
||||
if "incoming_jumps" in symbol: |
||||
incoming += len(symbol["incoming_jumps"]) |
||||
if "incoming_pointers" in symbol: |
||||
incoming += len(symbol["incoming_pointers"]) |
||||
|
||||
if node.name in SPECIAL_NODE_COLORS: |
||||
node.attr["color"] = SPECIAL_NODE_COLORS[node.name] |
||||
elif incoming == 1: |
||||
node.attr["color"] = "lightblue" |
||||
elif incoming > 25: |
||||
print("delete", node.name, "because it has {} incoming".format(incoming)) |
||||
callgraph.delete_node(node.name) |
||||
elif incoming > 15: |
||||
node.attr["color"] = "red" |
||||
|
||||
print("drawing") |
||||
callgraph.layout(prog="dot") |
||||
fn = "callgraph.svg" |
||||
print(fn) |
||||
callgraph.draw(fn) |
||||
|
||||
if __name__ == "__main__": |
||||
do_all_the_things() |
Loading…
Reference in new issue