utils.py 7.06 KB

Raw Blame History Permalink

from block import Block
import bisect
import os
import re
import random

TYPE_CLASS = ['class']
TYPE_DEF = ['def']
TYPE_IMPORT = ['from', 'import']
TYPE_CONDITOIN = ['if', 'elif', 'else', 'for', 'while', 'with']
multi_line_comments = ["'''", '"""']

def select_value(x):
    return x[1]

def threshold_dict(d, val): # split dict in two by thesholding value
    arr = sorted(d.items(), key=select_value)
    index = bisect.bisect_left([r[1] for r in arr], val)
    return arr[:index], arr[index:]

def is_number(s):
    if s[0] == '-':
        s = s[1:]
    return s.replace('.','',1).isdigit()

def is_extension(f, ext):
    return os.path.splitext(f)[1][1:] == ext

def _readdir_r(dirpath): # readdir for recursive
    ret = []
    for f in os.listdir(dirpath):
        ret.append(os.path.join(dirpath, f))

    return ret

def readdir(path): # read files from the directory
    pathList = [path]
    result = []
    i = 0

    while i < len(pathList):
        f = pathList[i]
        if os.path.isdir(f):
            pathList += _readdir_r(f)
        else:
            result.append(f)

        i += 1

    return result

def remove_string(line):
    strIn = False
    strCh = None
    result = ''
    i = 0
    L = len(line)

    while i < L:
        if i + 3 < L:
            if line[i:i+3] in multi_line_comments:
                if not strIn:
                    strIn = True
                    strCh = line[i:i+3]
                elif line[i:i+3] == strCh:
                    strIn = False

                i += 2
                continue

        c = line[i]
        i += 1

        if c == '\'' or c == '\"':
            if not strIn:
                strIn = True
                strCh = c
            elif c == strCh:
                strIn = False
            continue

        if strIn:
            continue

        result += c

    return result

def using_multi_string(line, index):
    line = line.strip()
    for comment in multi_line_comments:
        if line.find(comment, index) > 0:
            return True
    return False

def remove_unnecessary_comments(lines):
    # Warning : cannot detect all multi-line comments, because it exactly is multi-line string.

    #TODO: multi line string parser will not work well when using strings (and comments, also) more than one.
    # ex) a = ''' d ''' + '''
    #     abc ''' + '''
    #     x'''

    result = []
    multi_line = False
    multi_string = False
    strCh = None

    for line in lines:
        find_str_index = 0
        if multi_string:
            if strCh in line:
                find_str_index = line.find(strCh) + 3
                multi_string = False
                strCh = None

            result.append(line)
            continue

        if multi_line: # parsing multi-line comments
            if strCh in line:
                multi_line = False
                strCh = None
            continue

        if using_multi_string(line, find_str_index):
            i1 = line.find(multi_line_comments[0])
            i2 = line.find(multi_line_comments[1])

            if i1 < 0:
                i1 = len(line) + 1
            if i2 < 0:
                i2 = len(line) + 1

            if i1 < i2:
                strCh = multi_line_comments[0]
            else:
                strCh = multi_line_comments[1]

            result.append(line)
            if line.count(strCh) % 2 != 0:
                multi_string = True
            continue

        code = line.strip()

        if code[:3] in multi_line_comments: # detect in-out of multi-line comments
            if code.count(code[:3]) % 2 != 0: # comment count in odd numbers (start or end of comment is in the line)
                multi_line = True
                strCh = code[:3]
            continue

        comment_index = line.find('#')
        if comment_index >= 0: # one line comment found
            line = line[:comment_index]
        line = line.rstrip() # remove rightmost spaces

        if len(line) == 0: # no code in this line
            continue

        result.append(line) # add to results

    return result

def create_block_from_line(line):
    _line = remove_string(line)
    _line = _line.strip()

    if '@' in _line:
        return Block('TYPE_FACTORY', line)

    keywords = _line.split(' ')

    for key in keywords:
        if key in TYPE_IMPORT:
            return Block('TYPE_IMPORT', line)

        if key in TYPE_CLASS:
            return Block('TYPE_CLASS', line)

        if key in TYPE_DEF:
            return Block('TYPE_DEF', line)

        if key in TYPE_CONDITOIN:
            return Block('TYPE_CONDITION', line)

    return Block('TYPE_NORMAL', line)

def create_dictionary(arr): # create index dictionary for str array
    ret = dict()

    key = 0
    for name in arr:
        ret[name] = key
        key += 1

    return ret

def create_mapper(L): # create mapping array to match each index in range L
    arr = list(range(L))
    random.shuffle(arr)
    ret = arr.copy()

    for i in range(L):
        ret[i] = arr[i]

    return ret

def read_file(path):
    f = open(path, 'r', encoding='utf8')
    ret = f.readlines()
    f.close()
    return ret

def write_file(path, lines):
    f = open(path, 'w', encoding='utf8')

    for line in lines:
        if '\n' in line:
            f.write(line)
        else:
            f.write(line + '\n')
    f.close()

def write_block(path, block):
    f = open(path, 'w', encoding='utf8')
    f.write(str(block))
    f.close()

def shuffle_block(block):
    if block.blockType != 'TYPE_CLASS' and block.blockType != 'TYPE_ROOT':
        return

    for b in block.blocks:
        shuffle_block(b)

    random.shuffle(block.blocks)

def detect_multi_string(line, stack):
    L = len(line)

    for i in range(L):
        if i + 3 > L:
            break

        s = line[i:i+3]
        if s in multi_line_comments:
            if len(stack) > 0 and stack[-1] == s:
                stack.pop()
            elif len(stack) == 0:
                stack.append(s)
    return len(stack) > 0

def detect_parenthesis(line, stack):
    line = remove_string(line)

    for c in line:
        if c == '(':
            stack.append(1)
        elif c == ')':
            stack.pop()

    if len(stack) > 0:
        print(line)
    return len(stack) > 0

def detect_multi_line_code(line):
    line = line.rstrip()
    return len(line) > 0 and line[-1] == '\\'

def search_keyword(path, keyword, fast_detect=False): # detect just key string is included in the line if fast_detect is True
    files = [f for f in readdir(path) if is_extension(f, 'py')]
    result = list()

    for p in files:
        lines = read_file(p)
        lines = remove_unnecessary_comments(lines)

        for line in lines:

            if fast_detect:
                if keyword in line:
                    result.append(line)
                continue

            x = ''
            for c in line:
                if re.match('[A-Za-z_@0-9]', c):
                    x += c
                else:
                    x += ' '

            keywords = x.split(' ')
            if keyword in keywords:
                result.append(line)

    return result