perf-helper.py 14 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
#===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#

from __future__ import absolute_import, division, print_function

import sys
import os
import subprocess
import argparse
import time
import bisect
import shlex
import tempfile

test_env = { 'PATH'    : os.environ['PATH'] }

def findFilesWithExtension(path, extension):
  filenames = []
  for root, dirs, files in os.walk(path): 
    for filename in files:
      if filename.endswith(extension):
        filenames.append(os.path.join(root, filename))
  return filenames

def clean(args):
  if len(args) != 2:
    print('Usage: %s clean <path> <extension>\n' % __file__ +
      '\tRemoves all files with extension from <path>.')
    return 1
  for filename in findFilesWithExtension(args[0], args[1]):
    os.remove(filename)
  return 0

def merge(args):
  if len(args) != 3:
    print('Usage: %s clean <llvm-profdata> <output> <path>\n' % __file__ +
      '\tMerges all profraw files from path into output.')
    return 1
  cmd = [args[0], 'merge', '-o', args[1]]
  cmd.extend(findFilesWithExtension(args[2], "profraw"))
  subprocess.check_call(cmd)
  return 0

def dtrace(args):
  parser = argparse.ArgumentParser(prog='perf-helper dtrace',
    description='dtrace wrapper for order file generation')
  parser.add_argument('--buffer-size', metavar='size', type=int, required=False,
    default=1, help='dtrace buffer size in MB (default 1)')
  parser.add_argument('--use-oneshot', required=False, action='store_true',
    help='Use dtrace\'s oneshot probes')
  parser.add_argument('--use-ustack', required=False, action='store_true',
    help='Use dtrace\'s ustack to print function names')
  parser.add_argument('--cc1', required=False, action='store_true',
    help='Execute cc1 directly (don\'t profile the driver)')
  parser.add_argument('cmd', nargs='*', help='')

  # Use python's arg parser to handle all leading option arguments, but pass
  # everything else through to dtrace
  first_cmd = next(arg for arg in args if not arg.startswith("--"))
  last_arg_idx = args.index(first_cmd)

  opts = parser.parse_args(args[:last_arg_idx])
  cmd = args[last_arg_idx:]

  if opts.cc1:
    cmd = get_cc1_command_for_args(cmd, test_env)

  if opts.use_oneshot:
      target = "oneshot$target:::entry"
  else:
      target = "pid$target:::entry"
  predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0]))
  log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)'
  if opts.use_ustack:
      action = 'ustack(1);'
  else:
      action = 'printf("dtrace-Symbol: %s\\n", probefunc);'
  dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action)

  dtrace_args = []
  if not os.geteuid() == 0:
    print(
      'Script must be run as root, or you must add the following to your sudoers:'
      + '%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace')
    dtrace_args.append("sudo")

  dtrace_args.extend((
      'dtrace', '-xevaltime=exec',
      '-xbufsize=%dm' % (opts.buffer_size),
      '-q', '-n', dtrace_script, 
      '-c', ' '.join(cmd)))

  if sys.platform == "darwin":
    dtrace_args.append('-xmangled')

  start_time = time.time()

  with open("%d.dtrace" % os.getpid(), "w") as f:
    f.write("### Command: %s" % dtrace_args)
    subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE)

  elapsed = time.time() - start_time
  print("... data collection took %.4fs" % elapsed)

  return 0

def get_cc1_command_for_args(cmd, env):
  # Find the cc1 command used by the compiler. To do this we execute the
  # compiler with '-###' to figure out what it wants to do.
  cmd = cmd + ['-###']
  cc_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True).strip()
  cc_commands = []
  for ln in cc_output.split('\n'):
      # Filter out known garbage.
      if (ln == 'Using built-in specs.' or
          ln.startswith('Configured with:') or
          ln.startswith('Target:') or
          ln.startswith('Thread model:') or
          ln.startswith('InstalledDir:') or
          ln.startswith('LLVM Profile Note') or
          ln.startswith(' (in-process)') or
          ' version ' in ln):
          continue
      cc_commands.append(ln)

  if len(cc_commands) != 1:
      print('Fatal error: unable to determine cc1 command: %r' % cc_output)
      exit(1)

  cc1_cmd = shlex.split(cc_commands[0])
  if not cc1_cmd:
      print('Fatal error: unable to determine cc1 command: %r' % cc_output)
      exit(1)

  return cc1_cmd

def cc1(args):
  parser = argparse.ArgumentParser(prog='perf-helper cc1',
    description='cc1 wrapper for order file generation')
  parser.add_argument('cmd', nargs='*', help='')

  # Use python's arg parser to handle all leading option arguments, but pass
  # everything else through to dtrace
  first_cmd = next(arg for arg in args if not arg.startswith("--"))
  last_arg_idx = args.index(first_cmd)

  opts = parser.parse_args(args[:last_arg_idx])
  cmd = args[last_arg_idx:]

  # clear the profile file env, so that we don't generate profdata
  # when capturing the cc1 command
  cc1_env = test_env
  cc1_env["LLVM_PROFILE_FILE"] = os.devnull
  cc1_cmd = get_cc1_command_for_args(cmd, cc1_env)

  subprocess.check_call(cc1_cmd)
  return 0

def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
                             missing_symbols, opts):
  def fix_mangling(symbol):
    if sys.platform == "darwin":
      if symbol[0] != '_' and symbol != 'start':
          symbol = '_' + symbol
    return symbol

  def get_symbols_with_prefix(symbol):
    start_index = bisect.bisect_left(all_symbols, symbol)
    for s in all_symbols[start_index:]:
      if not s.startswith(symbol):
        break
      yield s

  # Extract the list of symbols from the given file, which is assumed to be
  # the output of a dtrace run logging either probefunc or ustack(1) and
  # nothing else. The dtrace -xdemangle option needs to be used.
  #
  # This is particular to OS X at the moment, because of the '_' handling.
  with open(path) as f:
    current_timestamp = None
    for ln in f:
      # Drop leading and trailing whitespace.
      ln = ln.strip()
      if not ln.startswith("dtrace-"):
        continue

      # If this is a timestamp specifier, extract it.
      if ln.startswith("dtrace-TS: "):
        _,data = ln.split(': ', 1)
        if not data.isdigit():
          print("warning: unrecognized timestamp line %r, ignoring" % ln,
            file=sys.stderr)
          continue
        current_timestamp = int(data)
        continue
      elif ln.startswith("dtrace-Symbol: "):

        _,ln = ln.split(': ', 1)
        if not ln:
          continue

        # If there is a '`' in the line, assume it is a ustack(1) entry in
        # the form of <modulename>`<modulefunc>, where <modulefunc> is never
        # truncated (but does need the mangling patched).
        if '`' in ln:
          yield (current_timestamp, fix_mangling(ln.split('`',1)[1]))
          continue

        # Otherwise, assume this is a probefunc printout. DTrace on OS X
        # seems to have a bug where it prints the mangled version of symbols
        # which aren't C++ mangled. We just add a '_' to anything but start
        # which doesn't already have a '_'.
        symbol = fix_mangling(ln)

        # If we don't know all the symbols, or the symbol is one of them,
        # just return it.
        if not all_symbols_set or symbol in all_symbols_set:
          yield (current_timestamp, symbol)
          continue

        # Otherwise, we have a symbol name which isn't present in the
        # binary. We assume it is truncated, and try to extend it.

        # Get all the symbols with this prefix.
        possible_symbols = list(get_symbols_with_prefix(symbol))
        if not possible_symbols:
          continue

        # If we found too many possible symbols, ignore this as a prefix.
        if len(possible_symbols) > 100:
          print( "warning: ignoring symbol %r " % symbol +
            "(no match and too many possible suffixes)", file=sys.stderr) 
          continue

        # Report that we resolved a missing symbol.
        if opts.show_missing_symbols and symbol not in missing_symbols:
          print("warning: resolved missing symbol %r" % symbol, file=sys.stderr)
          missing_symbols.add(symbol)

        # Otherwise, treat all the possible matches as having occurred. This
        # is an over-approximation, but it should be ok in practice.
        for s in possible_symbols:
          yield (current_timestamp, s)

def uniq(list):
  seen = set()
  for item in list:
    if item not in seen:
      yield item
      seen.add(item)

def form_by_call_order(symbol_lists):
  # Simply strategy, just return symbols in order of occurrence, even across
  # multiple runs.
  return uniq(s for symbols in symbol_lists for s in symbols)

def form_by_call_order_fair(symbol_lists):
  # More complicated strategy that tries to respect the call order across all
  # of the test cases, instead of giving a huge preference to the first test
  # case.

  # First, uniq all the lists.
  uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists]

  # Compute the successors for each list.
  succs = {}
  for symbols in uniq_lists:
    for a,b in zip(symbols[:-1], symbols[1:]):
      succs[a] = items = succs.get(a, [])
      if b not in items:
        items.append(b)
  
  # Emit all the symbols, but make sure to always emit all successors from any
  # call list whenever we see a symbol.
  #
  # There isn't much science here, but this sometimes works better than the
  # more naive strategy. Then again, sometimes it doesn't so more research is
  # probably needed.
  return uniq(s
    for symbols in symbol_lists
    for node in symbols
    for s in ([node] + succs.get(node,[])))
 
def form_by_frequency(symbol_lists):
  # Form the order file by just putting the most commonly occurring symbols
  # first. This assumes the data files didn't use the oneshot dtrace method.
 
  counts = {}
  for symbols in symbol_lists:
    for a in symbols:
      counts[a] = counts.get(a,0) + 1

  by_count = list(counts.items())
  by_count.sort(key = lambda __n: -__n[1])
  return [s for s,n in by_count]
 
def form_by_random(symbol_lists):
  # Randomize the symbols.
  merged_symbols = uniq(s for symbols in symbol_lists
                          for s in symbols)
  random.shuffle(merged_symbols)
  return merged_symbols
 
def form_by_alphabetical(symbol_lists):
  # Alphabetize the symbols.
  merged_symbols = list(set(s for symbols in symbol_lists for s in symbols))
  merged_symbols.sort()
  return merged_symbols

methods = dict((name[len("form_by_"):],value)
  for name,value in locals().items() if name.startswith("form_by_"))

def genOrderFile(args):
  parser = argparse.ArgumentParser(
    "%prog  [options] <dtrace data file directories>]")
  parser.add_argument('input', nargs='+', help='')
  parser.add_argument("--binary", metavar="PATH", type=str, dest="binary_path",
    help="Path to the binary being ordered (for getting all symbols)",
    default=None)
  parser.add_argument("--output", dest="output_path",
    help="path to output order file to write", default=None, required=True,
    metavar="PATH")
  parser.add_argument("--show-missing-symbols", dest="show_missing_symbols",
    help="show symbols which are 'fixed up' to a valid name (requires --binary)",
    action="store_true", default=None)
  parser.add_argument("--output-unordered-symbols",
    dest="output_unordered_symbols_path",
    help="write a list of the unordered symbols to PATH (requires --binary)",
    default=None, metavar="PATH")
  parser.add_argument("--method", dest="method",
    help="order file generation method to use", choices=list(methods.keys()),
    default='call_order')
  opts = parser.parse_args(args)

  # If the user gave us a binary, get all the symbols in the binary by
  # snarfing 'nm' output.
  if opts.binary_path is not None:
     output = subprocess.check_output(['nm', '-P', opts.binary_path], universal_newlines=True)
     lines = output.split("\n")
     all_symbols = [ln.split(' ',1)[0]
                    for ln in lines
                    if ln.strip()]
     print("found %d symbols in binary" % len(all_symbols))
     all_symbols.sort()
  else:
     all_symbols = []
  all_symbols_set = set(all_symbols)

  # Compute the list of input files.
  input_files = []
  for dirname in opts.input:
    input_files.extend(findFilesWithExtension(dirname, "dtrace"))

  # Load all of the input files.
  print("loading from %d data files" % len(input_files))
  missing_symbols = set()
  timestamped_symbol_lists = [
      list(parse_dtrace_symbol_file(path, all_symbols, all_symbols_set,
                                    missing_symbols, opts))
      for path in input_files]

  # Reorder each symbol list.
  symbol_lists = []
  for timestamped_symbols_list in timestamped_symbol_lists:
    timestamped_symbols_list.sort()
    symbol_lists.append([symbol for _,symbol in timestamped_symbols_list])

  # Execute the desire order file generation method.
  method = methods.get(opts.method)
  result = list(method(symbol_lists))

  # Report to the user on what percentage of symbols are present in the order
  # file.
  num_ordered_symbols = len(result)
  if all_symbols:
    print("note: order file contains %d/%d symbols (%.2f%%)" % (
      num_ordered_symbols, len(all_symbols),
      100.*num_ordered_symbols/len(all_symbols)), file=sys.stderr)

  if opts.output_unordered_symbols_path:
    ordered_symbols_set = set(result)
    with open(opts.output_unordered_symbols_path, 'w') as f:
      f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set))

  # Write the order file.
  with open(opts.output_path, 'w') as f:
    f.write("\n".join(result))
    f.write("\n")

  return 0

commands = {'clean' : clean,
  'merge' : merge, 
  'dtrace' : dtrace,
  'cc1' : cc1,
  'gen-order-file' : genOrderFile}

def main():
  f = commands[sys.argv[1]]
  sys.exit(f(sys.argv[2:]))

if __name__ == '__main__':
  main()