utils.py 3.07 KB
import r2pipe
import angr
import cle
import json

def loadBinary(path):
    with open(path, 'rb') as f:
        return f.read()

def getInst(block):
    inst = block.capstone.insns[0]
    # size, address
    # mnemonic: opcode
    # op_str: operand
    return inst.mnemonic, inst.op_str

def getInsts(block):
    ret = []
    for inst in block.capstone.insns:
        opcode = inst.mnemonic
        ret.append(opcode)
    return ret
    # size, address
    # mnemonic: opcode
    # op_str: operand

def readFunctionInfoFromElf(elfPath):
    r = r2pipe.open(elfPath, flags=['-2'])
    r.cmd('aaa')
    ret = []
    for f in r.cmd('pdfj @@fcn').split('\n'):
        if f == '': continue
        f = json.loads(f)
        addr = int(f['addr'])
        size = int(f['size'])
        name = f['name']
        ops = []
        for v in f['ops']:
            if 'opcode' in v:
                ops.append(v['opcode'].split(' ')[0])
        ret.append({'addr':addr, 'size':size, 'name':name, 'ops':ops})
    return ret

def getRadareHandlerByOffsetList(binPath, functionOffsets):
    r = r2pipe.open(binPath, ['-2', '-aarm', '-b16', '-m0x0'])
    for offset in functionOffsets:
        r.cmd(f"s {offset&(~1)}")
        r.cmd('aaa')
    return r

def readFunctionInfoFromBin(binPath, functionOffsets):
    ret = []
    r = getRadareHandlerByOffsetList(binPath, functionOffsets)
    for f in r.cmd('pdfj @@fcn').split('\n'):
        if f == '': continue
        f = json.loads(f)
        addr = int(f['addr'])
        size = int(f['size'])
        ops = []
        for v in f['ops']:
            if 'opcode' in v:
                ops.append(v['opcode'].split(' ')[0])
        ret.append({'offset':addr, 'size':size, 'name':'none', 'ops':ops})
    return ret

def levenshtein(s1, s2, debug=False):
    if len(s1) < len(s2):
        return levenshtein(s2, s1, debug)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))

        if debug:
            print(current_row[1:])

        previous_row = current_row

    return previous_row[-1]

def compareFunction(f1, f2):
    distance = levenshtein(f1, f2)
    return 1.0 - distance / max([len(f1), len(f2)])

    def ngram(s, num):
        res = []
        slen = len(s) - num + 1
        for i in range(slen):
            #print('a',s)
            ss = (s[i], s[i+1])
            #ss = s[i:i+num]
            res.append(ss)
        return res
    def diff_ngram(sa, sb, num):
        a = ngram(sa, num)
        b = ngram(sb, num)
        r = []
        cnt = 0
        for i in a:
            for j in b:
                if i == j:
                    cnt += 1
                    r.append(i)
        #return cnt / max([len(a), len(b)]), r
        return cnt, r
    prob, _ = diff_ngram(f1, f2, 2)
    return prob

# readFunctionInfoFromElf('API.ino.elf')