Heuristic.py 18.7 KB

Raw Blame History Permalink

# DExTer : Debugging Experience Tester
# ~~~~~~   ~         ~~         ~   ~~
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Calculate a 'score' based on some dextIR.
Assign penalties based on different commands to decrease the score.
1.000 would be a perfect score.
0.000 is the worst theoretical score possible.
"""

from collections import defaultdict, namedtuple, Counter
import difflib
import os
from itertools import groupby
from dex.command.StepValueInfo import StepValueInfo


PenaltyCommand = namedtuple('PenaltyCommand', ['pen_dict', 'max_penalty'])
# 'meta' field used in different ways by different things
PenaltyInstance = namedtuple('PenaltyInstance', ['meta', 'the_penalty'])


def add_heuristic_tool_arguments(parser):
    parser.add_argument(
        '--penalty-variable-optimized',
        type=int,
        default=3,
        help='set the penalty multiplier for each'
        ' occurrence of a variable that was optimized'
        ' away',
        metavar='<int>')
    parser.add_argument(
        '--penalty-misordered-values',
        type=int,
        default=3,
        help='set the penalty multiplier for each'
        ' occurrence of a misordered value.',
        metavar='<int>')
    parser.add_argument(
        '--penalty-irretrievable',
        type=int,
        default=4,
        help='set the penalty multiplier for each'
        " occurrence of a variable that couldn't"
        ' be retrieved',
        metavar='<int>')
    parser.add_argument(
        '--penalty-not-evaluatable',
        type=int,
        default=5,
        help='set the penalty multiplier for each'
        " occurrence of a variable that couldn't"
        ' be evaluated',
        metavar='<int>')
    parser.add_argument(
        '--penalty-missing-values',
        type=int,
        default=6,
        help='set the penalty multiplier for each missing'
        ' value',
        metavar='<int>')
    parser.add_argument(
        '--penalty-incorrect-values',
        type=int,
        default=7,
        help='set the penalty multiplier for each'
        ' occurrence of an unexpected value.',
        metavar='<int>')
    parser.add_argument(
        '--penalty-unreachable',
        type=int,
        default=4,  # XXX XXX XXX selected by random
        help='set the penalty for each line stepped onto that should'
        ' have been unreachable.',
        metavar='<int>')
    parser.add_argument(
        '--penalty-misordered-steps',
        type=int,
        default=2,  # XXX XXX XXX selected by random
        help='set the penalty for differences in the order of steps'
        ' the program was expected to observe.',
        metavar='<int>')
    parser.add_argument(
        '--penalty-missing-step',
        type=int,
        default=4,  # XXX XXX XXX selected by random
        help='set the penalty for the program skipping over a step.',
        metavar='<int>')
    parser.add_argument(
        '--penalty-incorrect-program-state',
        type=int,
        default=4,  # XXX XXX XXX selected by random
        help='set the penalty for the program never entering an expected state'
        ' or entering an unexpected state.',
        metavar='<int>')


class Heuristic(object):
    def __init__(self, context, steps):
        self.context = context
        self.penalties = {}

        worst_penalty = max([
            self.penalty_variable_optimized, self.penalty_irretrievable,
            self.penalty_not_evaluatable, self.penalty_incorrect_values,
            self.penalty_missing_values, self.penalty_unreachable,
            self.penalty_missing_step, self.penalty_misordered_steps
        ])

        # Get DexExpectWatchType results.
        try:
            for command in steps.commands['DexExpectWatchType']:
                command.eval(steps)
                maximum_possible_penalty = min(3, len(
                    command.values)) * worst_penalty
                name, p = self._calculate_expect_watch_penalties(
                    command, maximum_possible_penalty)
                name = name + ' ExpectType'
                self.penalties[name] = PenaltyCommand(p,
                                                      maximum_possible_penalty)
        except KeyError:
            pass

        # Get DexExpectWatchValue results.
        try:
            for command in steps.commands['DexExpectWatchValue']:
                command.eval(steps)
                maximum_possible_penalty = min(3, len(
                    command.values)) * worst_penalty
                name, p = self._calculate_expect_watch_penalties(
                    command, maximum_possible_penalty)
                name = name + ' ExpectValue'
                self.penalties[name] = PenaltyCommand(p,
                                                      maximum_possible_penalty)
        except KeyError:
            pass

        try:
            penalties = defaultdict(list)
            maximum_possible_penalty_all = 0
            for expect_state in steps.commands['DexExpectProgramState']:
                success = expect_state.eval(steps)
                p = 0 if success else self.penalty_incorrect_program_state

                meta = 'expected {}: {}'.format(
                    '{} times'.format(expect_state.times)
                        if expect_state.times >= 0 else 'at least once',
                    expect_state.program_state_text)

                if success:
                    meta = '<g>{}</>'.format(meta)

                maximum_possible_penalty = self.penalty_incorrect_program_state
                maximum_possible_penalty_all += maximum_possible_penalty
                name = expect_state.program_state_text
                penalties[meta] = [PenaltyInstance('{} times'.format(
                    len(expect_state.encounters)), p)]
            self.penalties['expected program states'] = PenaltyCommand(
                penalties, maximum_possible_penalty_all)
        except KeyError:
            pass

        # Get the total number of each step kind.
        step_kind_counts = defaultdict(int)
        for step in getattr(steps, 'steps'):
            step_kind_counts[step.step_kind] += 1

        # Get DexExpectStepKind results.
        penalties = defaultdict(list)
        maximum_possible_penalty_all = 0
        try:
            for command in steps.commands['DexExpectStepKind']:
                command.eval()
                # Cap the penalty at 2 * expected count or else 1
                maximum_possible_penalty = max(command.count * 2, 1)
                p = abs(command.count - step_kind_counts[command.name])
                actual_penalty = min(p, maximum_possible_penalty)
                key = ('{}'.format(command.name)
                       if actual_penalty else '<g>{}</>'.format(command.name))
                penalties[key] = [PenaltyInstance(p, actual_penalty)]
                maximum_possible_penalty_all += maximum_possible_penalty
            self.penalties['step kind differences'] = PenaltyCommand(
                penalties, maximum_possible_penalty_all)
        except KeyError:
            pass

        if 'DexUnreachable' in steps.commands:
            cmds = steps.commands['DexUnreachable']
            unreach_count = 0

            # Find steps with unreachable in them
            ureachs = [
                s for s in steps.steps if 'DexUnreachable' in s.watches.keys()
            ]

            # There's no need to match up cmds with the actual watches
            upen = self.penalty_unreachable

            count = upen * len(ureachs)
            if count != 0:
                d = dict()
                for x in ureachs:
                    msg = 'line {} reached'.format(x.current_location.lineno)
                    d[msg] = [PenaltyInstance(upen, upen)]
            else:
                d = {
                    '<g>No unreachable lines seen</>': [PenaltyInstance(0, 0)]
                }
            total = PenaltyCommand(d, len(cmds) * upen)

            self.penalties['unreachable lines'] = total

        if 'DexExpectStepOrder' in steps.commands:
            cmds = steps.commands['DexExpectStepOrder']

            # Form a list of which line/cmd we _should_ have seen
            cmd_num_lst = [(x, c.lineno) for c in cmds
                                         for x in c.sequence]
            # Order them by the sequence number
            cmd_num_lst.sort(key=lambda t: t[0])
            # Strip out sequence key
            cmd_num_lst = [y for x, y in cmd_num_lst]

            # Now do the same, but for the actually observed lines/cmds
            ss = steps.steps
            deso = [s for s in ss if 'DexExpectStepOrder' in s.watches.keys()]
            deso = [s.watches['DexExpectStepOrder'] for s in deso]
            # We rely on the steps remaining in order here
            order_list = [int(x.expression) for x in deso]

            # First off, check to see whether or not there are missing items
            expected = Counter(cmd_num_lst)
            seen = Counter(order_list)

            unseen_line_dict = dict()
            skipped_line_dict = dict()

            mispen = self.penalty_missing_step
            num_missing = 0
            num_repeats = 0
            for k, v in expected.items():
                if k not in seen:
                    msg = 'Line {} not seen'.format(k)
                    unseen_line_dict[msg] = [PenaltyInstance(mispen, mispen)]
                    num_missing += v
                elif v > seen[k]:
                    msg = 'Line {} skipped at least once'.format(k)
                    skipped_line_dict[msg] = [PenaltyInstance(mispen, mispen)]
                    num_missing += v - seen[k]
                elif v < seen[k]:
                    # Don't penalise unexpected extra sightings of a line
                    # for now
                    num_repeats = seen[k] - v
                    pass

            if len(unseen_line_dict) == 0:
                pi = PenaltyInstance(0, 0)
                unseen_line_dict['<g>All lines were seen</>'] = [pi]

            if len(skipped_line_dict) == 0:
                pi = PenaltyInstance(0, 0)
                skipped_line_dict['<g>No lines were skipped</>'] = [pi]

            total = PenaltyCommand(unseen_line_dict, len(expected) * mispen)
            self.penalties['Unseen lines'] = total
            total = PenaltyCommand(skipped_line_dict, len(expected) * mispen)
            self.penalties['Skipped lines'] = total

            ordpen = self.penalty_misordered_steps
            cmd_num_lst = [str(x) for x in cmd_num_lst]
            order_list = [str(x) for x in order_list]
            lst = list(difflib.Differ().compare(cmd_num_lst, order_list))
            diff_detail = Counter(l[0] for l in lst)

            assert '?' not in diff_detail

            # Diffs are hard to interpret; there are many algorithms for
            # condensing them. Ignore all that, and just print out the changed
            # sequences, it's up to the user to interpret what's going on.

            def filt_lines(s, seg, e, key):
                lst = [s]
                for x in seg:
                    if x[0] == key:
                        lst.append(int(x[2:]))
                lst.append(e)
                return lst

            diff_msgs = dict()

            def reportdiff(start_idx, segment, end_idx):
                msg = 'Order mismatch, expected linenos {}, saw {}'
                expected_linenos = filt_lines(start_idx, segment, end_idx, '-')
                seen_linenos = filt_lines(start_idx, segment, end_idx, '+')
                msg = msg.format(expected_linenos, seen_linenos)
                diff_msgs[msg] = [PenaltyInstance(ordpen, ordpen)]

            # Group by changed segments.
            start_expt_step = 0
            end_expt_step = 0
            to_print_lst = []
            for k, subit in groupby(lst, lambda x: x[0] == ' '):
                if k:  # Whitespace group
                    nochanged = [x for x in subit]
                    end_expt_step = int(nochanged[0][2:])
                    if len(to_print_lst) > 0:
                        reportdiff(start_expt_step, to_print_lst,
                                   end_expt_step)
                    start_expt_step = int(nochanged[-1][2:])
                    to_print_lst = []
                else:  # Diff group, save for printing
                    to_print_lst = [x for x in subit]

            # If there was a dangling different step, print that too.
            if len(to_print_lst) > 0:
                reportdiff(start_expt_step, to_print_lst, '[End]')

            if len(diff_msgs) == 0:
                diff_msgs['<g>No lines misordered</>'] = [
                    PenaltyInstance(0, 0)
                ]
            total = PenaltyCommand(diff_msgs, len(cmd_num_lst) * ordpen)
            self.penalties['Misordered lines'] = total

        return

    def _calculate_expect_watch_penalties(self, c, maximum_possible_penalty):
        penalties = defaultdict(list)

        if c.line_range[0] == c.line_range[-1]:
            line_range = str(c.line_range[0])
        else:
            line_range = '{}-{}'.format(c.line_range[0], c.line_range[-1])

        name = '{}:{} [{}]'.format(
            os.path.basename(c.path), line_range, c.expression)

        num_actual_watches = len(c.expected_watches) + len(
            c.unexpected_watches)

        penalty_available = maximum_possible_penalty

        # Only penalize for missing values if we have actually seen a watch
        # that's returned us an actual value at some point, or if we've not
        # encountered the value at all.
        if num_actual_watches or c.times_encountered == 0:
            for v in c.missing_values:
                current_penalty = min(penalty_available,
                                      self.penalty_missing_values)
                penalty_available -= current_penalty
                penalties['missing values'].append(
                    PenaltyInstance(v, current_penalty))

        for v in c.encountered_values:
            penalties['<g>expected encountered watches</>'].append(
                PenaltyInstance(v, 0))

        penalty_descriptions = [
            (self.penalty_not_evaluatable, c.invalid_watches,
             'could not evaluate'),
            (self.penalty_variable_optimized, c.optimized_out_watches,
             'result optimized away'),
            (self.penalty_misordered_values, c.misordered_watches,
             'misordered result'),
            (self.penalty_irretrievable, c.irretrievable_watches,
             'result could not be retrieved'),
            (self.penalty_incorrect_values, c.unexpected_watches,
             'unexpected result'),
        ]

        for penalty_score, watches, description in penalty_descriptions:
            # We only penalize the encountered issue for each missing value per
            # command but we still want to record each one, so set the penalty
            # to 0 after the threshold is passed.
            times_to_penalize = len(c.missing_values)

            for w in watches:
                times_to_penalize -= 1
                penalty_score = min(penalty_available, penalty_score)
                penalty_available -= penalty_score
                penalties[description].append(
                    PenaltyInstance(w, penalty_score))
                if not times_to_penalize:
                    penalty_score = 0

        return name, penalties

    @property
    def penalty(self):
        result = 0

        maximum_allowed_penalty = 0
        for name, pen_cmd in self.penalties.items():
            maximum_allowed_penalty += pen_cmd.max_penalty
            value = pen_cmd.pen_dict
            for category, inst_list in value.items():
                result += sum(x.the_penalty for x in inst_list)
        return min(result, maximum_allowed_penalty)

    @property
    def max_penalty(self):
        return sum(p_cat.max_penalty for p_cat in self.penalties.values())

    @property
    def score(self):
        try:
            return 1.0 - (self.penalty / float(self.max_penalty))
        except ZeroDivisionError:
            return float('nan')

    @property
    def summary_string(self):
        score = self.score
        isnan = score != score  # pylint: disable=comparison-with-itself
        color = 'g'
        if score < 0.25 or isnan:
            color = 'r'
        elif score < 0.75:
            color = 'y'

        return '<{}>({:.4f})</>'.format(color, score)

    @property
    def verbose_output(self):  # noqa
        string = ''
        string += ('\n')
        for command in sorted(self.penalties):
            pen_cmd = self.penalties[command]
            maximum_possible_penalty = pen_cmd.max_penalty
            total_penalty = 0
            lines = []
            for category in sorted(pen_cmd.pen_dict):
                lines.append('    <r>{}</>:\n'.format(category))

                for result, penalty in pen_cmd.pen_dict[category]:
                    if isinstance(result, StepValueInfo):
                        text = 'step {}'.format(result.step_index)
                        if result.expected_value:
                            text += ' ({})'.format(result.expected_value)
                    else:
                        text = str(result)
                    if penalty:
                        assert penalty > 0, penalty
                        total_penalty += penalty
                        text += ' <r>[-{}]</>'.format(penalty)
                    lines.append('      {}\n'.format(text))

                lines.append('\n')

            string += ('  <b>{}</> <y>[{}/{}]</>\n'.format(
                command, total_penalty, maximum_possible_penalty))
            for line in lines:
                string += (line)
        string += ('\n')
        return string

    @property
    def penalty_variable_optimized(self):
        return self.context.options.penalty_variable_optimized

    @property
    def penalty_irretrievable(self):
        return self.context.options.penalty_irretrievable

    @property
    def penalty_not_evaluatable(self):
        return self.context.options.penalty_not_evaluatable

    @property
    def penalty_incorrect_values(self):
        return self.context.options.penalty_incorrect_values

    @property
    def penalty_missing_values(self):
        return self.context.options.penalty_missing_values

    @property
    def penalty_misordered_values(self):
        return self.context.options.penalty_misordered_values

    @property
    def penalty_unreachable(self):
        return self.context.options.penalty_unreachable

    @property
    def penalty_missing_step(self):
        return self.context.options.penalty_missing_step

    @property
    def penalty_misordered_steps(self):
        return self.context.options.penalty_misordered_steps

    @property
    def penalty_incorrect_program_state(self):
        return self.context.options.penalty_incorrect_program_state