data_refiner.py 598 Bytes
from utils import *
import file_parser
import random

def refine_files(input, output):
    files = [f for f in readdir(input) if is_extension(f, 'py')]
    random.shuffle(files)

    for p in files:
        lines = read_file(p)

        print("Refining:", p)
        block = file_parser.parse_block(lines)

        filepath = p.split(input)[1][1:]
        os.makedirs(os.path.join(output, filepath.split('\\')[0]), exist_ok=True) # create the output directory if not exists
        path = os.path.join(output, filepath)
        write_block(path, block)

    print("Done generating Refined Dataset")