demangle_tree.py 7.26 KB
# Given a path to llvm-objdump and a directory tree, spider the directory tree
# dumping every object file encountered with correct options needed to demangle
# symbols in the object file, and collect statistics about failed / crashed
# demanglings.  Useful for stress testing the demangler against a large corpus
# of inputs.

from __future__ import print_function

import argparse
import functools
import os
import re
import sys
import subprocess
import traceback
from multiprocessing import Pool
import multiprocessing

args = None

def parse_line(line):
    question = line.find('?')
    if question == -1:
        return None, None

    open_paren = line.find('(', question)
    if open_paren == -1:
        return None, None
    close_paren = line.rfind(')', open_paren)
    if open_paren == -1:
        return None, None
    mangled = line[question : open_paren]
    demangled = line[open_paren+1 : close_paren]
    return mangled.strip(), demangled.strip()

class Result(object):
    def __init__(self):
        self.crashed = []
        self.file = None
        self.nsymbols = 0
        self.errors = set()
        self.nfiles = 0

class MapContext(object):
    def __init__(self):
        self.rincomplete = None
        self.rcumulative = Result()
        self.pending_objs = []
        self.npending = 0

def process_file(path, objdump):
    r = Result()
    r.file = path

    popen_args = [objdump, '-t', '-demangle', path]
    p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    if p.returncode != 0:
        r.crashed = [r.file]
        return r

    output = stdout.decode('utf-8')

    for line in output.splitlines():
        mangled, demangled = parse_line(line)
        if mangled is None:
            continue
        r.nsymbols += 1
        if "invalid mangled name" in demangled:
            r.errors.add(mangled)
    return r

def add_results(r1, r2):
    r1.crashed.extend(r2.crashed)
    r1.errors.update(r2.errors)
    r1.nsymbols += r2.nsymbols
    r1.nfiles += r2.nfiles

def print_result_row(directory, result):
    print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
        result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))

def process_one_chunk(pool, chunk_size, objdump, context):
    objs = []

    incomplete = False
    dir_results = {}
    ordered_dirs = []
    while context.npending > 0 and len(objs) < chunk_size:
        this_dir = context.pending_objs[0][0]
        ordered_dirs.append(this_dir)
        re = Result()
        if context.rincomplete is not None:
            re = context.rincomplete
            context.rincomplete = None

        dir_results[this_dir] = re
        re.file = this_dir

        nneeded = chunk_size - len(objs)
        objs_this_dir = context.pending_objs[0][1]
        navail = len(objs_this_dir)
        ntaken = min(nneeded, navail)
        objs.extend(objs_this_dir[0:ntaken])
        remaining_objs_this_dir = objs_this_dir[ntaken:]
        context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
        context.npending -= ntaken
        if ntaken == navail:
            context.pending_objs.pop(0)
        else:
            incomplete = True

        re.nfiles += ntaken

    assert(len(objs) == chunk_size or context.npending == 0)

    copier = functools.partial(process_file, objdump=objdump)
    mapped_results = list(pool.map(copier, objs))

    for mr in mapped_results:
        result_dir = os.path.dirname(mr.file)
        result_entry = dir_results[result_dir]
        add_results(result_entry, mr)

    # It's only possible that a single item is incomplete, and it has to be the
    # last item.
    if incomplete:
        context.rincomplete = dir_results[ordered_dirs[-1]]
        ordered_dirs.pop()

    # Now ordered_dirs contains a list of all directories which *did* complete.
    for c in ordered_dirs:
        re = dir_results[c]
        add_results(context.rcumulative, re)
        print_result_row(c, re)

def process_pending_files(pool, chunk_size, objdump, context):
    while context.npending >= chunk_size:
        process_one_chunk(pool, chunk_size, objdump, context)

def go():
    global args

    obj_dir = args.dir
    extensions = args.extensions.split(',')
    extensions = [x if x[0] == '.' else '.' + x for x in extensions]


    pool_size = 48
    pool = Pool(processes=pool_size)

    try:
        nfiles = 0
        context = MapContext()

        for root, dirs, files in os.walk(obj_dir):
            root = os.path.normpath(root)
            pending = []
            for f in files:
                file, ext = os.path.splitext(f)
                if not ext in extensions:
                    continue

                nfiles += 1
                full_path = os.path.join(root, f)
                full_path = os.path.normpath(full_path)
                pending.append(full_path)

            # If this directory had no object files, just print a default
            # status line and continue with the next dir
            if len(pending) == 0:
                print_result_row(root, Result())
                continue

            context.npending += len(pending)
            context.pending_objs.append((root, pending))
            # Drain the tasks, `pool_size` at a time, until we have less than
            # `pool_size` tasks remaining.
            process_pending_files(pool, pool_size, args.objdump, context)

        assert(context.npending < pool_size);
        process_one_chunk(pool, pool_size, args.objdump, context)

        total = context.rcumulative
        nfailed = len(total.errors)
        nsuccess = total.nsymbols - nfailed
        ncrashed = len(total.crashed)

        if (nfailed > 0):
            print("Failures:")
            for m in sorted(total.errors):
                print("  " + m)
        if (ncrashed > 0):
            print("Crashes:")
            for f in sorted(total.crashed):
                print("  " + f)
        print("Summary:")
        spct = float(nsuccess)/float(total.nsymbols)
        fpct = float(nfailed)/float(total.nsymbols)
        cpct = float(ncrashed)/float(nfiles)
        print("Processed {0} object files.".format(nfiles))
        print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
        print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
        print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
            
    except:
        traceback.print_exc()

    pool.close()
    pool.join()

if __name__ == "__main__":
    def_obj = 'obj' if sys.platform == 'win32' else 'o'

    parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
    parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
    parser.add_argument('--objdump', type=str, help='path to llvm-objdump.  If not specified ' +
                        'the tool is located as if by `which llvm-objdump`.')
    parser.add_argument('--extensions', type=str, default=def_obj,
                        help='comma separated list of extensions to demangle (e.g. `o,obj`).  ' +
                        'By default this will be `obj` on Windows and `o` otherwise.')

    args = parser.parse_args()


    multiprocessing.freeze_support()
    go()