exploded-graph-rewriter.py 39.4 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
#!/usr/bin/env python
#
#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===-----------------------------------------------------------------------===#


from __future__ import print_function

import argparse
import collections
import difflib
import json
import logging
import os
import re
import sys


#===-----------------------------------------------------------------------===#
# These data structures represent a deserialized ExplodedGraph.
#===-----------------------------------------------------------------------===#


# A helper function for finding the difference between two dictionaries.
def diff_dicts(curr, prev):
    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
    return (removed, added)


# Represents any program state trait that is a dictionary of key-value pairs.
class GenericMap:
    def __init__(self, items):
        self.generic_map = collections.OrderedDict(items)

    def diff(self, prev):
        return diff_dicts(self.generic_map, prev.generic_map)

    def is_different(self, prev):
        removed, added = self.diff(prev)
        return len(removed) != 0 or len(added) != 0


# A deserialized source location.
class SourceLocation:
    def __init__(self, json_loc):
        logging.debug('json: %s' % json_loc)
        self.line = json_loc['line']
        self.col = json_loc['column']
        self.filename = os.path.basename(json_loc['file']) \
            if 'file' in json_loc else '(main file)'
        self.spelling = SourceLocation(json_loc['spelling']) \
            if 'spelling' in json_loc else None

    def is_macro(self):
        return self.spelling is not None


# A deserialized program point.
class ProgramPoint:
    def __init__(self, json_pp):
        self.kind = json_pp['kind']
        self.tag = json_pp['tag']
        self.node_id = json_pp['node_id']
        self.is_sink = bool(json_pp['is_sink'])
        self.has_report = bool(json_pp['has_report'])
        if self.kind == 'Edge':
            self.src_id = json_pp['src_id']
            self.dst_id = json_pp['dst_id']
        elif self.kind == 'Statement':
            logging.debug(json_pp)
            self.stmt_kind = json_pp['stmt_kind']
            self.cast_kind = json_pp['cast_kind'] \
                if 'cast_kind' in json_pp else None
            self.stmt_point_kind = json_pp['stmt_point_kind']
            self.stmt_id = json_pp['stmt_id']
            self.pointer = json_pp['pointer']
            self.pretty = json_pp['pretty']
            self.loc = SourceLocation(json_pp['location']) \
                if json_pp['location'] is not None else None
        elif self.kind == 'BlockEntrance':
            self.block_id = json_pp['block_id']


# A single expression acting as a key in a deserialized Environment.
class EnvironmentBindingKey:
    def __init__(self, json_ek):
        # CXXCtorInitializer is not a Stmt!
        self.stmt_id = json_ek['stmt_id'] if 'stmt_id' in json_ek \
            else json_ek['init_id']
        self.pretty = json_ek['pretty']
        self.kind = json_ek['kind'] if 'kind' in json_ek else None

    def _key(self):
        return self.stmt_id

    def __eq__(self, other):
        return self._key() == other._key()

    def __hash__(self):
        return hash(self._key())


# Deserialized description of a location context.
class LocationContext:
    def __init__(self, json_frame):
        self.lctx_id = json_frame['lctx_id']
        self.caption = json_frame['location_context']
        self.decl = json_frame['calling']
        self.loc = SourceLocation(json_frame['location']) \
            if json_frame['location'] is not None else None

    def _key(self):
        return self.lctx_id

    def __eq__(self, other):
        return self._key() == other._key()

    def __hash__(self):
        return hash(self._key())


# A group of deserialized Environment bindings that correspond to a specific
# location context.
class EnvironmentFrame:
    def __init__(self, json_frame):
        self.location_context = LocationContext(json_frame)
        self.bindings = collections.OrderedDict(
            [(EnvironmentBindingKey(b),
              b['value']) for b in json_frame['items']]
            if json_frame['items'] is not None else [])

    def diff_bindings(self, prev):
        return diff_dicts(self.bindings, prev.bindings)

    def is_different(self, prev):
        removed, added = self.diff_bindings(prev)
        return len(removed) != 0 or len(added) != 0


# A deserialized Environment. This class can also hold other entities that
# are similar to Environment, such as Objects Under Construction.
class GenericEnvironment:
    def __init__(self, json_e):
        self.frames = [EnvironmentFrame(f) for f in json_e]

    def diff_frames(self, prev):
        # TODO: It's difficult to display a good diff when frame numbers shift.
        if len(self.frames) != len(prev.frames):
            return None

        updated = []
        for i in range(len(self.frames)):
            f = self.frames[i]
            prev_f = prev.frames[i]
            if f.location_context == prev_f.location_context:
                if f.is_different(prev_f):
                    updated.append(i)
            else:
                # We have the whole frame replaced with another frame.
                # TODO: Produce a nice diff.
                return None

        # TODO: Add support for added/removed.
        return updated

    def is_different(self, prev):
        updated = self.diff_frames(prev)
        return updated is None or len(updated) > 0


# A single binding key in a deserialized RegionStore cluster.
class StoreBindingKey:
    def __init__(self, json_sk):
        self.kind = json_sk['kind']
        self.offset = json_sk['offset']

    def _key(self):
        return (self.kind, self.offset)

    def __eq__(self, other):
        return self._key() == other._key()

    def __hash__(self):
        return hash(self._key())


# A single cluster of the deserialized RegionStore.
class StoreCluster:
    def __init__(self, json_sc):
        self.base_region = json_sc['cluster']
        self.bindings = collections.OrderedDict(
            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])

    def diff_bindings(self, prev):
        return diff_dicts(self.bindings, prev.bindings)

    def is_different(self, prev):
        removed, added = self.diff_bindings(prev)
        return len(removed) != 0 or len(added) != 0


# A deserialized RegionStore.
class Store:
    def __init__(self, json_s):
        self.ptr = json_s['pointer']
        self.clusters = collections.OrderedDict(
            [(c['pointer'], StoreCluster(c)) for c in json_s['items']])

    def diff_clusters(self, prev):
        removed = [k for k in prev.clusters if k not in self.clusters]
        added = [k for k in self.clusters if k not in prev.clusters]
        updated = [k for k in prev.clusters if k in self.clusters
                   and prev.clusters[k].is_different(self.clusters[k])]
        return (removed, added, updated)

    def is_different(self, prev):
        removed, added, updated = self.diff_clusters(prev)
        return len(removed) != 0 or len(added) != 0 or len(updated) != 0


# Deserialized messages from a single checker in a single program state.
# Basically a list of raw strings.
class CheckerLines:
    def __init__(self, json_lines):
        self.lines = json_lines

    def diff_lines(self, prev):
        lines = difflib.ndiff(prev.lines, self.lines)
        return [l.strip() for l in lines
                if l.startswith('+') or l.startswith('-')]

    def is_different(self, prev):
        return len(self.diff_lines(prev)) > 0


# Deserialized messages of all checkers, separated by checker.
class CheckerMessages:
    def __init__(self, json_m):
        self.items = collections.OrderedDict(
            [(m['checker'], CheckerLines(m['messages'])) for m in json_m])

    def diff_messages(self, prev):
        removed = [k for k in prev.items if k not in self.items]
        added = [k for k in self.items if k not in prev.items]
        updated = [k for k in prev.items if k in self.items
                   and prev.items[k].is_different(self.items[k])]
        return (removed, added, updated)

    def is_different(self, prev):
        removed, added, updated = self.diff_messages(prev)
        return len(removed) != 0 or len(added) != 0 or len(updated) != 0


# A deserialized program state.
class ProgramState:
    def __init__(self, state_id, json_ps):
        logging.debug('Adding ProgramState ' + str(state_id))

        if json_ps is None:
            json_ps = {
                'store': None,
                'environment': None,
                'constraints': None,
                'dynamic_types': None,
                'constructing_objects': None,
                'checker_messages': None
            }

        self.state_id = state_id

        self.store = Store(json_ps['store']) \
            if json_ps['store'] is not None else None

        self.environment = \
            GenericEnvironment(json_ps['environment']['items']) \
            if json_ps['environment'] is not None else None

        self.constraints = GenericMap([
            (c['symbol'], c['range']) for c in json_ps['constraints']
        ]) if json_ps['constraints'] is not None else None

        self.dynamic_types = GenericMap([
                (t['region'], '%s%s' % (t['dyn_type'],
                                        ' (or a sub-class)'
                                        if t['sub_classable'] else ''))
                for t in json_ps['dynamic_types']]) \
            if json_ps['dynamic_types'] is not None else None

        self.constructing_objects = \
            GenericEnvironment(json_ps['constructing_objects']) \
            if json_ps['constructing_objects'] is not None else None

        self.checker_messages = CheckerMessages(json_ps['checker_messages']) \
            if json_ps['checker_messages'] is not None else None


# A deserialized exploded graph node. Has a default constructor because it
# may be referenced as part of an edge before its contents are deserialized,
# and in this moment we already need a room for predecessors and successors.
class ExplodedNode:
    def __init__(self):
        self.predecessors = []
        self.successors = []

    def construct(self, node_id, json_node):
        logging.debug('Adding ' + node_id)
        self.ptr = node_id[4:]
        self.points = [ProgramPoint(p) for p in json_node['program_points']]
        self.node_id = self.points[-1].node_id
        self.state = ProgramState(json_node['state_id'],
                                  json_node['program_state']
            if json_node['program_state'] is not None else None);

        assert self.node_name() == node_id

    def node_name(self):
        return 'Node' + self.ptr


# A deserialized ExplodedGraph. Constructed by consuming a .dot file
# line-by-line.
class ExplodedGraph:
    # Parse .dot files with regular expressions.
    node_re = re.compile(
        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
    edge_re = re.compile(
        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')

    def __init__(self):
        self.nodes = collections.defaultdict(ExplodedNode)
        self.root_id = None
        self.incomplete_line = ''

    def add_raw_line(self, raw_line):
        if raw_line.startswith('//'):
            return

        # Allow line breaks by waiting for ';'. This is not valid in
        # a .dot file, but it is useful for writing tests.
        if len(raw_line) > 0 and raw_line[-1] != ';':
            self.incomplete_line += raw_line
            return
        raw_line = self.incomplete_line + raw_line
        self.incomplete_line = ''

        # Apply regexps one by one to see if it's a node or an edge
        # and extract contents if necessary.
        logging.debug('Line: ' + raw_line)
        result = self.edge_re.match(raw_line)
        if result is not None:
            logging.debug('Classified as edge line.')
            pred = result.group(1)
            succ = result.group(2)
            self.nodes[pred].successors.append(succ)
            self.nodes[succ].predecessors.append(pred)
            return
        result = self.node_re.match(raw_line)
        if result is not None:
            logging.debug('Classified as node line.')
            node_id = result.group(1)
            if len(self.nodes) == 0:
                self.root_id = node_id
            # Note: when writing tests you don't need to escape everything,
            # even though in a valid dot file everything is escaped.
            node_label = result.group(2).replace(' ', '') \
                                        .replace('\\"', '"') \
                                        .replace('\\{', '{') \
                                        .replace('\\}', '}') \
                                        .replace('\\\\', '\\') \
                                        .replace('\\|', '|') \
                                        .replace('\\<', '\\\\<') \
                                        .replace('\\>', '\\\\>') \
                                        .rstrip(',')
            # Handle `\l` separately because a string literal can be in code
            # like "string\\literal" with the `\l` inside.
            # Also on Windows macros __FILE__ produces specific delimiters `\`
            # and a directory or file may starts with the letter `l`.
            # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
            # because the literal as a rule containes multiple `\` before `\l`.
            node_label = re.sub(r'(?<!\\)\\l', '', node_label)
            logging.debug(node_label)
            json_node = json.loads(node_label)
            self.nodes[node_id].construct(node_id, json_node)
            return
        logging.debug('Skipping.')


#===-----------------------------------------------------------------------===#
# Visitors traverse a deserialized ExplodedGraph and do different things
# with every node and edge.
#===-----------------------------------------------------------------------===#


# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
# syntax highlighing.
class DotDumpVisitor:
    def __init__(self, do_diffs, dark_mode, gray_mode,
                 topo_mode, dump_dot_only):
        self._do_diffs = do_diffs
        self._dark_mode = dark_mode
        self._gray_mode = gray_mode
        self._topo_mode = topo_mode
        self._dump_dot_only = dump_dot_only
        self._output = []

    def _dump_raw(self, s):
        if self._dump_dot_only:
            print(s, end='')
        else:
            self._output.append(s)

    def output(self):
        assert not self._dump_dot_only
        if sys.version_info[0] > 2 and sys.version_info[1] >= 5:
            return ''.join(self._output).encode()
        else:
            return ''.join(self._output)

    def _dump(self, s):
        s = s.replace('&', '&amp;') \
             .replace('{', '\\{') \
             .replace('}', '\\}') \
             .replace('\\<', '&lt;') \
             .replace('\\>', '&gt;') \
             .replace('|', '\\|')
        s = re.sub(r'(?<!\\)\\l', '<br />', s)
        if self._gray_mode:
            s = re.sub(r'<font color="[a-z0-9]*">', '', s)
            s = re.sub(r'</font>', '', s)
        self._dump_raw(s)

    @staticmethod
    def _diff_plus_minus(is_added):
        if is_added is None:
            return ''
        if is_added:
            return '<font color="forestgreen">+</font>'
        return '<font color="red">-</font>'

    @staticmethod
    def _short_pretty(s):
        if s is None:
            return None
        if len(s) < 20:
            return s
        left = s.find('{')
        right = s.rfind('}')
        if left == -1 or right == -1 or left >= right:
            return s
        candidate = s[0:left + 1] + ' ... ' + s[right:]
        if len(candidate) >= len(s):
            return s
        return candidate

    @staticmethod
    def _make_sloc(loc):
        if loc is None:
            return '<i>Invalid Source Location</i>'

        def make_plain_loc(loc):
            return '%s:<b>%s</b>:<b>%s</b>' \
                % (loc.filename, loc.line, loc.col)

        if loc.is_macro():
            return '%s <font color="royalblue1">' \
                   '(<i>spelling at </i> %s)</font>' \
                % (make_plain_loc(loc), make_plain_loc(loc.spelling))

        return make_plain_loc(loc)

    def visit_begin_graph(self, graph):
        self._graph = graph
        self._dump_raw('digraph "ExplodedGraph" {\n')
        if self._dark_mode:
            self._dump_raw('bgcolor="gray10";\n')
        self._dump_raw('label="";\n')

    def visit_program_point(self, p):
        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
            color = 'gold3'
        elif p.kind in ['PreStmtPurgeDeadSymbols',
                        'PostStmtPurgeDeadSymbols']:
            color = 'red'
        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
            color = 'dodgerblue' if self._dark_mode else 'blue'
        elif p.kind in ['Statement']:
            color = 'cyan4'
        else:
            color = 'forestgreen'

        self._dump('<tr><td align="left">%s.</td>' % p.node_id)

        if p.kind == 'Statement':
            # This avoids pretty-printing huge statements such as CompoundStmt.
            # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
            skip_pretty = 'PurgeDeadSymbols' in p.stmt_point_kind
            stmt_color = 'cyan3'
            self._dump('<td align="left" width="0">%s:</td>'
                       '<td align="left" width="0"><font color="%s">'
                       '%s</font> </td>'
                       '<td align="left"><i>S%s</i></td>'
                       '<td align="left"><font color="%s">%s</font></td>'
                       '<td align="left">%s</td></tr>'
                       % (self._make_sloc(p.loc), color,
                          '%s (%s)' % (p.stmt_kind, p.cast_kind)
                          if p.cast_kind is not None else p.stmt_kind,
                          p.stmt_id, stmt_color, p.stmt_point_kind,
                          self._short_pretty(p.pretty)
                          if not skip_pretty else ''))
        elif p.kind == 'Edge':
            self._dump('<td width="0"></td>'
                       '<td align="left" width="0">'
                       '<font color="%s">%s</font></td><td align="left">'
                       '[B%d] -\\> [B%d]</td></tr>'
                       % (color, 'BlockEdge', p.src_id, p.dst_id))
        elif p.kind == 'BlockEntrance':
            self._dump('<td width="0"></td>'
                       '<td align="left" width="0">'
                       '<font color="%s">%s</font></td>'
                       '<td align="left">[B%d]</td></tr>'
                       % (color, p.kind, p.block_id))
        else:
            # TODO: Print more stuff for other kinds of points.
            self._dump('<td width="0"></td>'
                       '<td align="left" width="0" colspan="2">'
                       '<font color="%s">%s</font></td></tr>'
                       % (color, p.kind))

        if p.tag is not None:
            self._dump('<tr><td width="0"></td><td width="0"></td>'
                       '<td colspan="3" align="left">'
                       '<b>Tag: </b> <font color="crimson">'
                       '%s</font></td></tr>' % p.tag)

        if p.has_report:
            self._dump('<tr><td width="0"></td><td width="0"></td>'
                       '<td colspan="3" align="left">'
                       '<font color="red"><b>Bug Report Attached'
                       '</b></font></td></tr>')
        if p.is_sink:
            self._dump('<tr><td width="0"></td><td width="0"></td>'
                       '<td colspan="3" align="left">'
                       '<font color="cornflowerblue"><b>Sink Node'
                       '</b></font></td></tr>')

    def visit_environment(self, e, prev_e=None):
        self._dump('<table border="0">')

        def dump_location_context(lc, is_added=None):
            self._dump('<tr><td>%s</td>'
                       '<td align="left"><b>%s</b></td>'
                       '<td align="left" colspan="2">'
                       '<font color="gray60">%s </font>'
                       '%s</td></tr>'
                       % (self._diff_plus_minus(is_added),
                          lc.caption, lc.decl,
                          ('(%s)' % self._make_sloc(lc.loc))
                          if lc.loc is not None else ''))

        def dump_binding(f, b, is_added=None):
            self._dump('<tr><td>%s</td>'
                       '<td align="left"><i>S%s</i></td>'
                       '%s'
                       '<td align="left">%s</td>'
                       '<td align="left">%s</td></tr>'
                       % (self._diff_plus_minus(is_added),
                          b.stmt_id,
                          '<td align="left"><font color="%s"><i>'
                          '%s</i></font></td>' % (
                              'lavender' if self._dark_mode else 'darkgreen',
                              ('(%s)' % b.kind) if b.kind is not None else ' '
                          ),
                          self._short_pretty(b.pretty), f.bindings[b]))

        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
        if frames_updated:
            for i in frames_updated:
                f = e.frames[i]
                prev_f = prev_e.frames[i]
                dump_location_context(f.location_context)
                bindings_removed, bindings_added = f.diff_bindings(prev_f)
                for b in bindings_removed:
                    dump_binding(prev_f, b, False)
                for b in bindings_added:
                    dump_binding(f, b, True)
        else:
            for f in e.frames:
                dump_location_context(f.location_context)
                for b in f.bindings:
                    dump_binding(f, b)

        self._dump('</table>')

    def visit_environment_in_state(self, selector, title, s, prev_s=None):
        e = getattr(s, selector)
        prev_e = getattr(prev_s, selector) if prev_s is not None else None
        if e is None and prev_e is None:
            return

        self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
        if e is None:
            self._dump('<i> Nothing!</i>')
        else:
            if prev_e is not None:
                if e.is_different(prev_e):
                    self._dump('</td></tr><tr><td align="left">')
                    self.visit_environment(e, prev_e)
                else:
                    self._dump('<i> No changes!</i>')
            else:
                self._dump('</td></tr><tr><td align="left">')
                self.visit_environment(e)

        self._dump('</td></tr>')

    def visit_store(self, s, prev_s=None):
        self._dump('<table border="0">')

        def dump_binding(s, c, b, is_added=None):
            self._dump('<tr><td>%s</td>'
                       '<td align="left">%s</td>'
                       '<td align="left">%s</td>'
                       '<td align="left">%s</td>'
                       '<td align="left">%s</td></tr>'
                       % (self._diff_plus_minus(is_added),
                          s.clusters[c].base_region, b.offset,
                          '(<i>Default</i>)' if b.kind == 'Default'
                          else '',
                          s.clusters[c].bindings[b]))

        if prev_s is not None:
            clusters_removed, clusters_added, clusters_updated = \
                s.diff_clusters(prev_s)
            for c in clusters_removed:
                for b in prev_s.clusters[c].bindings:
                    dump_binding(prev_s, c, b, False)
            for c in clusters_updated:
                bindings_removed, bindings_added = \
                    s.clusters[c].diff_bindings(prev_s.clusters[c])
                for b in bindings_removed:
                    dump_binding(prev_s, c, b, False)
                for b in bindings_added:
                    dump_binding(s, c, b, True)
            for c in clusters_added:
                for b in s.clusters[c].bindings:
                    dump_binding(s, c, b, True)
        else:
            for c in s.clusters:
                for b in s.clusters[c].bindings:
                    dump_binding(s, c, b)

        self._dump('</table>')

    def visit_store_in_state(self, s, prev_s=None):
        st = s.store
        prev_st = prev_s.store if prev_s is not None else None
        if st is None and prev_st is None:
            return

        self._dump('<hr /><tr><td align="left"><b>Store: </b>')
        if st is None:
            self._dump('<i> Nothing!</i>')
        else:
            if self._dark_mode:
                self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
            else:
                self._dump(' <font color="gray">(%s)</font>' % st.ptr)
            if prev_st is not None:
                if s.store.is_different(prev_st):
                    self._dump('</td></tr><tr><td align="left">')
                    self.visit_store(st, prev_st)
                else:
                    self._dump('<i> No changes!</i>')
            else:
                self._dump('</td></tr><tr><td align="left">')
                self.visit_store(st)
        self._dump('</td></tr>')

    def visit_generic_map(self, m, prev_m=None):
        self._dump('<table border="0">')

        def dump_pair(m, k, is_added=None):
            self._dump('<tr><td>%s</td>'
                       '<td align="left">%s</td>'
                       '<td align="left">%s</td></tr>'
                       % (self._diff_plus_minus(is_added),
                          k, m.generic_map[k]))

        if prev_m is not None:
            removed, added = m.diff(prev_m)
            for k in removed:
                dump_pair(prev_m, k, False)
            for k in added:
                dump_pair(m, k, True)
        else:
            for k in m.generic_map:
                dump_pair(m, k, None)

        self._dump('</table>')

    def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
        m = getattr(s, selector)
        prev_m = getattr(prev_s, selector) if prev_s is not None else None
        if m is None and prev_m is None:
            return

        self._dump('<hr />')
        self._dump('<tr><td align="left">'
                   '<b>%s: </b>' % title)
        if m is None:
            self._dump('<i> Nothing!</i>')
        else:
            if prev_m is not None:
                if m.is_different(prev_m):
                    self._dump('</td></tr><tr><td align="left">')
                    self.visit_generic_map(m, prev_m)
                else:
                    self._dump('<i> No changes!</i>')
            else:
                self._dump('</td></tr><tr><td align="left">')
                self.visit_generic_map(m)

        self._dump('</td></tr>')

    def visit_checker_messages(self, m, prev_m=None):
        self._dump('<table border="0">')

        def dump_line(l, is_added=None):
            self._dump('<tr><td>%s</td>'
                       '<td align="left">%s</td></tr>'
                       % (self._diff_plus_minus(is_added), l))

        def dump_chk(chk, is_added=None):
            dump_line('<i>%s</i>:' % chk, is_added)

        if prev_m is not None:
            removed, added, updated = m.diff_messages(prev_m)
            for chk in removed:
                dump_chk(chk, False)
                for l in prev_m.items[chk].lines:
                    dump_line(l, False)
            for chk in updated:
                dump_chk(chk)
                for l in m.items[chk].diff_lines(prev_m.items[chk]):
                    dump_line(l[1:], l.startswith('+'))
            for chk in added:
                dump_chk(chk, True)
                for l in m.items[chk].lines:
                    dump_line(l, True)
        else:
            for chk in m.items:
                dump_chk(chk)
                for l in m.items[chk].lines:
                    dump_line(l)

        self._dump('</table>')

    def visit_checker_messages_in_state(self, s, prev_s=None):
        m = s.checker_messages
        prev_m = prev_s.checker_messages if prev_s is not None else None
        if m is None and prev_m is None:
            return

        self._dump('<hr />')
        self._dump('<tr><td align="left">'
                   '<b>Checker State: </b>')
        if m is None:
            self._dump('<i> Nothing!</i>')
        else:
            if prev_m is not None:
                if m.is_different(prev_m):
                    self._dump('</td></tr><tr><td align="left">')
                    self.visit_checker_messages(m, prev_m)
                else:
                    self._dump('<i> No changes!</i>')
            else:
                self._dump('</td></tr><tr><td align="left">')
                self.visit_checker_messages(m)

        self._dump('</td></tr>')

    def visit_state(self, s, prev_s):
        self.visit_store_in_state(s, prev_s)
        self.visit_environment_in_state('environment', 'Expressions',
                                        s, prev_s)
        self.visit_generic_map_in_state('constraints', 'Ranges',
                                        s, prev_s)
        self.visit_generic_map_in_state('dynamic_types', 'Dynamic Types',
                                        s, prev_s)
        self.visit_environment_in_state('constructing_objects',
                                        'Objects Under Construction',
                                        s, prev_s)
        self.visit_checker_messages_in_state(s, prev_s)

    def visit_node(self, node):
        self._dump('%s [shape=record,'
                   % (node.node_name()))
        if self._dark_mode:
            self._dump('color="white",fontcolor="gray80",')
        self._dump('label=<<table border="0">')

        self._dump('<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
                   % ("gray20" if self._dark_mode else "gray70",
                      node.state.state_id
                      if node.state is not None else 'Unspecified'))
        if not self._topo_mode:
            self._dump('<tr><td align="left" width="0">')
            if len(node.points) > 1:
                self._dump('<b>Program points:</b></td></tr>')
            else:
                self._dump('<b>Program point:</b></td></tr>')
        self._dump('<tr><td align="left" width="0">'
                   '<table border="0" align="left" width="0">')
        for p in node.points:
            self.visit_program_point(p)
        self._dump('</table></td></tr>')

        if node.state is not None and not self._topo_mode:
            prev_s = None
            # Do diffs only when we have a unique predecessor.
            # Don't do diffs on the leaf nodes because they're
            # the important ones.
            if self._do_diffs and len(node.predecessors) == 1 \
               and len(node.successors) > 0:
                prev_s = self._graph.nodes[node.predecessors[0]].state
            self.visit_state(node.state, prev_s)
        self._dump_raw('</table>>];\n')

    def visit_edge(self, pred, succ):
        self._dump_raw('%s -> %s%s;\n' % (
            pred.node_name(), succ.node_name(),
            ' [color="white"]' if self._dark_mode else ''
        ))

    def visit_end_of_graph(self):
        self._dump_raw('}\n')

        if not self._dump_dot_only:
            import sys
            import tempfile

            def write_temp_file(suffix, data):
                fd, filename = tempfile.mkstemp(suffix=suffix)
                print('Writing "%s"...' % filename)
                with os.fdopen(fd, 'w') as fp:
                    fp.write(data)
                print('Done! Please remember to remove the file.')
                return filename

            try:
                import graphviz
            except ImportError:
                # The fallback behavior if graphviz is not installed!
                print('Python graphviz not found. Please invoke')
                print('  $ pip install graphviz')
                print('in order to enable automatic conversion to HTML.')
                print()
                print('You may also convert DOT to SVG manually via')
                print('  $ dot -Tsvg input.dot -o output.svg')
                print()
                write_temp_file('.dot', self.output())
                return

            svg = graphviz.pipe('dot', 'svg', self.output())

            filename = write_temp_file(
                '.html', '<html><body bgcolor="%s">%s</body></html>' % (
                             '#1a1a1a' if self._dark_mode else 'white', svg))
            if sys.platform == 'win32':
                os.startfile(filename)
            elif sys.platform == 'darwin':
                os.system('open "%s"' % filename)
            else:
                os.system('xdg-open "%s"' % filename)


#===-----------------------------------------------------------------------===#
# Explorers know how to traverse the ExplodedGraph in a certain order.
# They would invoke a Visitor on every node or edge they encounter.
#===-----------------------------------------------------------------------===#


# BasicExplorer explores the whole graph in no particular order.
class BasicExplorer:
    def explore(self, graph, visitor):
        visitor.visit_begin_graph(graph)
        for node in sorted(graph.nodes):
            logging.debug('Visiting ' + node)
            visitor.visit_node(graph.nodes[node])
            for succ in sorted(graph.nodes[node].successors):
                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
        visitor.visit_end_of_graph()


#===-----------------------------------------------------------------------===#
# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
# Trimmers can be combined together by applying them sequentially.
#===-----------------------------------------------------------------------===#


# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
# Useful when the trimmed graph is still too large.
class SinglePathTrimmer:
    def trim(self, graph):
        visited_nodes = set()
        node_id = graph.root_id
        while True:
            visited_nodes.add(node_id)
            node = graph.nodes[node_id]
            if len(node.successors) > 0:
                succ_id = node.successors[0]
                succ = graph.nodes[succ_id]
                node.successors = [succ_id]
                succ.predecessors = [node_id]
                if succ_id in visited_nodes:
                    break
                node_id = succ_id
            else:
                break
        graph.nodes = {node_id: graph.nodes[node_id]
                       for node_id in visited_nodes}


# TargetedTrimmer keeps paths that lead to specific nodes and discards all
# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
# a crash).
class TargetedTrimmer:
    def __init__(self, target_nodes):
        self._target_nodes = target_nodes

    @staticmethod
    def parse_target_node(node, graph):
        if node.startswith('0x'):
            ret = 'Node' + node
            assert ret in graph.nodes
            return ret
        else:
            for other_id in graph.nodes:
                other = graph.nodes[other_id]
                if other.node_id == int(node):
                    return other_id

    @staticmethod
    def parse_target_nodes(target_nodes, graph):
        return [TargetedTrimmer.parse_target_node(node, graph)
                for node in target_nodes.split(',')]

    def trim(self, graph):
        queue = self._target_nodes
        visited_nodes = set()

        while len(queue) > 0:
            node_id = queue.pop()
            visited_nodes.add(node_id)
            node = graph.nodes[node_id]
            for pred_id in node.predecessors:
                if pred_id not in visited_nodes:
                    queue.append(pred_id)
        graph.nodes = {node_id: graph.nodes[node_id]
                       for node_id in visited_nodes}
        for node_id in graph.nodes:
            node = graph.nodes[node_id]
            node.successors = [succ_id for succ_id in node.successors
                               if succ_id in visited_nodes]
            node.predecessors = [succ_id for succ_id in node.predecessors
                                 if succ_id in visited_nodes]


#===-----------------------------------------------------------------------===#
# The entry point to the script.
#===-----------------------------------------------------------------------===#


def main():
    parser = argparse.ArgumentParser(
        description='Display and manipulate Exploded Graph dumps.')
    parser.add_argument('filename', type=str,
                        help='the .dot file produced by the Static Analyzer')
    parser.add_argument('-v', '--verbose', action='store_const',
                        dest='loglevel', const=logging.DEBUG,
                        default=logging.WARNING,
                        help='enable info prints')
    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
                        const=True, default=False,
                        help='display differences between states')
    parser.add_argument('-t', '--topology', action='store_const',
                        dest='topology', const=True, default=False,
                        help='only display program points, omit states')
    parser.add_argument('-s', '--single-path', action='store_const',
                        dest='single_path', const=True, default=False,
                        help='only display the leftmost path in the graph '
                             '(useful for trimmed graphs that still '
                             'branch too much)')
    parser.add_argument('--to', type=str, default=None,
                        help='only display execution paths from the root '
                             'to the given comma-separated list of nodes '
                             'identified by a pointer or a stable ID; '
                             'compatible with --single-path')
    parser.add_argument('--dark', action='store_const', dest='dark',
                        const=True, default=False,
                        help='dark mode')
    parser.add_argument('--gray', action='store_const', dest='gray',
                        const=True, default=False,
                        help='black-and-white mode')
    parser.add_argument('--dump-dot-only', action='store_const',
                        dest='dump_dot_only', const=True, default=False,
                        help='instead of writing an HTML file and immediately '
                             'displaying it, dump the rewritten dot file '
                             'to stdout')
    args = parser.parse_args()
    logging.basicConfig(level=args.loglevel)

    graph = ExplodedGraph()
    with open(args.filename) as fd:
        for raw_line in fd:
            raw_line = raw_line.strip()
            graph.add_raw_line(raw_line)

    trimmers = []
    if args.to is not None:
        trimmers.append(TargetedTrimmer(
            TargetedTrimmer.parse_target_nodes(args.to, graph)))
    if args.single_path:
        trimmers.append(SinglePathTrimmer())

    explorer = BasicExplorer()

    visitor = DotDumpVisitor(args.diff, args.dark, args.gray, args.topology,
                             args.dump_dot_only)

    for trimmer in trimmers:
        trimmer.trim(graph)

    explorer.explore(graph, visitor)


if __name__ == '__main__':
    main()