preprocessing.py 2.04 KB

Raw Blame History Permalink

"""
    Mel-Sepctrogram 이미지를 학습에 쓰이기 전에 전처리
    @FUNCTION create_training_data : 이미지를 전처리하는 함수
"""
import os
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pickle

dataset_path = "C:/Users/nokh9/Desktop/dog_sound_mel/mobile_net_" # 데이터가 있는 경로
CATEGORIES = ["barking", "howling", "whimpering"] # 감정에 대한 카테고리

training_data = []

"""
    이미지를 전처리
    @brief : preprocessing image for training
"""
def create_training_data():

    for category in CATEGORIES:
        path = os.path.join(dataset_path, category)
        class_num = CATEGORIES.index(category)
        print('-'*50)
        print(category + " is started...")
        print('-'*50)
        image_list = os.listdir(path)
        listdir_num = len(image_list)

        # 데이터가 있는 폴더에서 모든 이미지들을 전처리함
        for number in range(listdir_num):
            try:
                image_path = path + '/' + category + '_' + str(number+1) + '.jpg'
                image_array = cv2.imread(image_path, cv2.IMREAD_COLOR)
                new_array = cv2.resize(image_array, (62, 78))
                training_data.append([new_array, class_num])
                if((number+1)%100 == 0):
                    print(str(number+1) + 'is finished')
            except Exception as e:
                pass

# 메인 함수
if __name__ == "__main__":
    create_training_data()
    print(len(training_data))

    # 데이터를 training에 쓰일 데이터와 validation에 쓰일 데이터를 나눔
    random.shuffle(training_data)

    X = []
    y = []

    for features, label in training_data:
        X.append(features)
        y.append(label)

    X = np.array(X).reshape(-1, 62, 78, 3)

    # pickle 데이터로 x와 y를 따로 저장해줌
    pickle_out = open("X_final2.pickle", "wb")
    pickle.dump(X, pickle_out)
    pickle_out.close()

    pickle_out = open("y_final2.pickle", "wb")
    pickle.dump(y, pickle_out)
    pickle_out.close()